|
import gradio as gr |
|
from transformers import AutoModel, AutoTokenizer |
|
import numpy as np |
|
import json |
|
|
|
|
|
model_name = "Supabase/gte-small" |
|
model = AutoModel.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
def text_to_vector(texts_json): |
|
try: |
|
texts = json.loads(texts_json) |
|
if not isinstance(texts, list): |
|
raise ValueError("Input must be a JSON array of strings.") |
|
except json.JSONDecodeError: |
|
raise ValueError("Invalid JSON format.") |
|
|
|
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) |
|
outputs = model(**inputs) |
|
vectors = outputs.pooler_output.detach().numpy().tolist() |
|
return json.dumps(vectors) |
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
fn=text_to_vector, |
|
inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), |
|
outputs=gr.Textbox(label="Text Vectors (JSON)", lines=10), |
|
title="Batch Text to Vector", |
|
description="This demo converts an array of sentences to vectors and returns them as a JSON array." |
|
) |
|
|
|
demo.launch() |
|
|