# shared_resources.py | |
import torch | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
from sentence_transformers import SentenceTransformer | |
from datasets import load_dataset | |
# from transformers import AutoProcessor, MusicgenForConditionalGeneration | |
import re | |
class SharedResources: | |
def __init__(self): | |
# Set the device | |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Load SentenceTransformer | |
self.sentence_transformer = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1") | |
# Load the dataset | |
self.dataset = load_dataset("subashdvorak/tiktok-story-data3", revision="embedded") | |
self.data = self.dataset["train"] | |
self.data = self.data.add_faiss_index("embeddings") | |
# Create a single instance of SharedResources | |
shared_resources = SharedResources() | |