File size: 864 Bytes
509759d 6b2b518 509759d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# shared_resources.py
import torch
# from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
# from transformers import AutoProcessor, MusicgenForConditionalGeneration
import re
class SharedResources:
def __init__(self):
# Set the device
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load SentenceTransformer
self.sentence_transformer = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
# Load the dataset
self.dataset = load_dataset("subashdvorak/tiktok-story-data3", revision="embedded")
self.data = self.dataset["train"]
self.data = self.data.add_faiss_index("embeddings")
# Create a single instance of SharedResources
shared_resources = SharedResources()
|