Spaces:
Sleeping
Sleeping
import dataclasses | |
import numpy as np | |
from openai import OpenAI | |
def get_batch_embeddings( | |
client: OpenAI, texts: list[str], model="text-embedding-3-small" | |
) -> np.ndarray: | |
embeddings = client.embeddings.create(input=texts, model=model) | |
np_embeddings = np.array( | |
[embeddings.data[i].embedding for i in range(len(embeddings.data))] | |
) | |
return np_embeddings | |
def get_one_embedding( | |
client: OpenAI, text: str, model="text-embedding-3-small" | |
) -> np.ndarray: | |
embedding = client.embeddings.create(input=[text], model=model).data[0].embedding | |
return np.array(embedding) | |
class Chunk: | |
text: str | |
title: str | |
video_idx: int | |
link: str | |
class Dataset: | |
chunks: list[Chunk] | |
embeddings: np.ndarray | |
def __len__(self): | |
return len(self.chunks) | |