Vectorize

Sleeping

0xalfroz commited on Sep 17, 2024

Commit

885a800

verified ·

1 Parent(s): d118cf4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,23 +8,25 @@ model = AutoModel.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def text_to_vector(texts):
-    # Expect texts to be an array of sentences
     inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
     outputs = model(**inputs)
     vectors = outputs.pooler_output.detach().numpy()
-    # Convert each vector to a string representation
-    vector_strings = [", ".join(map(str, vector)) for vector in vectors]
-    return vector_strings
 demo = gr.Interface(
     fn=text_to_vector,
     inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"),
-    outputs=gr.Textbox(label="Text Vectors", lines=10),
-    title="Batch Text to Vector",
-    description="This demo converts an array of sentences to vectors."
 )
 demo.launch()

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def text_to_vector(texts):
+    # Tokenize the input array of sentences
     inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
     outputs = model(**inputs)
     vectors = outputs.pooler_output.detach().numpy()
+    # Convert each vector to a string representation and create an object
+    result = [
+        {"sentence": sentence, "vector": ", ".join(map(str, vector))}
+        for sentence, vector in zip(texts, vectors)
+    ]
+    return result
 demo = gr.Interface(
     fn=text_to_vector,
     inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"),
+    outputs=gr.JSON(label="Sentence and Vector Pairs"),
+    title="Batch Text to Vector 769 dim",
+    description="This demo converts an array of sentences to vectors and returns objects with sentence and vector."
 )
 demo.launch()