Update app.py
Browse files
app.py
CHANGED
@@ -78,6 +78,7 @@ def embedding_worker():
|
|
78 |
|
79 |
embedding_response_queue.put(formatted_response)
|
80 |
embedding_request_queue.task_done()
|
|
|
81 |
|
82 |
threading.Thread(target=embedding_worker, daemon=True).start()
|
83 |
|
@@ -100,6 +101,7 @@ def compute_embeddings(selected_task, input_text):
|
|
100 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
101 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
102 |
embeddings_list = embeddings.detach().cpu().numpy().tolist()
|
|
|
103 |
return embeddings_list
|
104 |
|
105 |
@spaces.GPU
|
@@ -130,6 +132,7 @@ def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, ext
|
|
130 |
free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
|
131 |
|
132 |
similarity_scores = {"Similarity 1-2": similarity1, "Similarity 1-3": similarity2, "Similarity 1-4": similarity3}
|
|
|
133 |
return similarity_scores
|
134 |
|
135 |
@spaces.GPU
|
@@ -138,6 +141,7 @@ def compute_cosine_similarity(emb1, emb2):
|
|
138 |
tensor2 = torch.tensor(emb2).to(device).half()
|
139 |
similarity = F.cosine_similarity(tensor1, tensor2).item()
|
140 |
free_memory(tensor1, tensor2)
|
|
|
141 |
return similarity
|
142 |
|
143 |
|
@@ -153,6 +157,7 @@ def compute_embeddings_batch(input_texts):
|
|
153 |
outputs = model(**batch_dict)
|
154 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
155 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
|
|
156 |
return embeddings.detach().cpu().numpy()
|
157 |
|
158 |
def semantic_search(query_embedding, corpus_embeddings, top_k=5):
|
@@ -188,6 +193,7 @@ def generate_and_format_embeddings(selected_task, input_text):
|
|
188 |
embedding_request_queue.put((selected_task, input_text))
|
189 |
response = embedding_response_queue.get()
|
190 |
embedding_response_queue.task_done()
|
|
|
191 |
return response
|
192 |
|
193 |
|
|
|
78 |
|
79 |
embedding_response_queue.put(formatted_response)
|
80 |
embedding_request_queue.task_done()
|
81 |
+
clear_cuda_cache()
|
82 |
|
83 |
threading.Thread(target=embedding_worker, daemon=True).start()
|
84 |
|
|
|
101 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
102 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
103 |
embeddings_list = embeddings.detach().cpu().numpy().tolist()
|
104 |
+
clear_cuda_cache()
|
105 |
return embeddings_list
|
106 |
|
107 |
@spaces.GPU
|
|
|
132 |
free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
|
133 |
|
134 |
similarity_scores = {"Similarity 1-2": similarity1, "Similarity 1-3": similarity2, "Similarity 1-4": similarity3}
|
135 |
+
clear_cuda_cache()
|
136 |
return similarity_scores
|
137 |
|
138 |
@spaces.GPU
|
|
|
141 |
tensor2 = torch.tensor(emb2).to(device).half()
|
142 |
similarity = F.cosine_similarity(tensor1, tensor2).item()
|
143 |
free_memory(tensor1, tensor2)
|
144 |
+
clear_cuda_cache()
|
145 |
return similarity
|
146 |
|
147 |
|
|
|
157 |
outputs = model(**batch_dict)
|
158 |
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
159 |
embeddings = F.normalize(embeddings, p=2, dim=1)
|
160 |
+
clear_cuda_cache()
|
161 |
return embeddings.detach().cpu().numpy()
|
162 |
|
163 |
def semantic_search(query_embedding, corpus_embeddings, top_k=5):
|
|
|
193 |
embedding_request_queue.put((selected_task, input_text))
|
194 |
response = embedding_response_queue.get()
|
195 |
embedding_response_queue.task_done()
|
196 |
+
clear_cuda_cache()
|
197 |
return response
|
198 |
|
199 |
|