|
import tensorflow as tf |
|
import tensorflow_hub as hub |
|
from tensorflow_text import SentencepieceTokenizer |
|
import gradio as gr |
|
import math |
|
|
|
model_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3" |
|
model = hub.load(model_url) |
|
|
|
def embed_text(text: str) -> dict: |
|
embeddings = model(text) |
|
return embeddings.numpy().tolist()[0] |
|
|
|
embed_text_inter = gr.Interface( |
|
fn = embed_text, |
|
inputs = "text", |
|
outputs = gr.JSON(), |
|
title = "Universal Sentence Encoder 3 Large" |
|
) |
|
|
|
def distance(text_1: str, text_2: str) -> float: |
|
embeddings_1 = embed_text(text_1) |
|
embeddings_2 = embed_text(text_2) |
|
|
|
dist = 0 |
|
for i in range(len(embeddings_1)): |
|
dist += (embeddings_1[i] - embeddings_2[i]) ** 2 |
|
dist = math.sqrt(dist) |
|
return dist |
|
|
|
|
|
distance_inter = gr.Interface( |
|
fn = distance, |
|
inputs = ["text", "text"], |
|
outputs = "number", |
|
title = "Universal Sentence Encoder 3 Large" |
|
) |
|
|
|
|
|
iface = gr.TabbedInterface( |
|
interface_list=[embed_text_inter, distance_inter], |
|
title="Universal Sentence Encoder 3 Large" |
|
) |
|
|
|
iface.launch() |