Spaces:
Sleeping
Sleeping
File size: 852 Bytes
31b6e27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import dataclasses
import numpy as np
from openai import OpenAI
def get_batch_embeddings(
client: OpenAI, texts: list[str], model="text-embedding-3-small"
) -> np.ndarray:
embeddings = client.embeddings.create(input=texts, model=model)
np_embeddings = np.array(
[embeddings.data[i].embedding for i in range(len(embeddings.data))]
)
return np_embeddings
def get_one_embedding(
client: OpenAI, text: str, model="text-embedding-3-small"
) -> np.ndarray:
embedding = client.embeddings.create(input=[text], model=model).data[0].embedding
return np.array(embedding)
@dataclasses.dataclass
class Chunk:
text: str
title: str
video_idx: int
link: str
@dataclasses.dataclass
class Dataset:
chunks: list[Chunk]
embeddings: np.ndarray
def __len__(self):
return len(self.chunks)
|