hot-ones-trivia / core.py
RedTachyon's picture
Initial commit from GitHub repository without history
31b6e27
raw
history blame contribute delete
852 Bytes
import dataclasses
import numpy as np
from openai import OpenAI
def get_batch_embeddings(
client: OpenAI, texts: list[str], model="text-embedding-3-small"
) -> np.ndarray:
embeddings = client.embeddings.create(input=texts, model=model)
np_embeddings = np.array(
[embeddings.data[i].embedding for i in range(len(embeddings.data))]
)
return np_embeddings
def get_one_embedding(
client: OpenAI, text: str, model="text-embedding-3-small"
) -> np.ndarray:
embedding = client.embeddings.create(input=[text], model=model).data[0].embedding
return np.array(embedding)
@dataclasses.dataclass
class Chunk:
text: str
title: str
video_idx: int
link: str
@dataclasses.dataclass
class Dataset:
chunks: list[Chunk]
embeddings: np.ndarray
def __len__(self):
return len(self.chunks)