File size: 852 Bytes
31b6e27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import dataclasses

import numpy as np
from openai import OpenAI


def get_batch_embeddings(
    client: OpenAI, texts: list[str], model="text-embedding-3-small"
) -> np.ndarray:
    embeddings = client.embeddings.create(input=texts, model=model)
    np_embeddings = np.array(
        [embeddings.data[i].embedding for i in range(len(embeddings.data))]
    )
    return np_embeddings


def get_one_embedding(
    client: OpenAI, text: str, model="text-embedding-3-small"
) -> np.ndarray:
    embedding = client.embeddings.create(input=[text], model=model).data[0].embedding
    return np.array(embedding)


@dataclasses.dataclass
class Chunk:
    text: str
    title: str
    video_idx: int
    link: str


@dataclasses.dataclass
class Dataset:
    chunks: list[Chunk]
    embeddings: np.ndarray

    def __len__(self):
        return len(self.chunks)