Spaces:
Sleeping
Sleeping
import os | |
import re | |
from typing import List, Optional | |
import openai | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from datasets import load_datasets | |
class LLM_Middleware(): | |
hf_key: str | |
dataset | |
def __init__(self, openai_key, hf) -> None: | |
openai.key = openai_key | |
self.hf_key = hf | |
''' | |
function for loading the dataset using hf trainer. | |
''' | |
def loadDataset(self,datasetName: str): | |
self.dataset = load_datasets(datasetName) | |
return self.dataset | |
def TokenizerFunction(modelName: str, dataset): | |
tokenizer = AutoTokenizer.from_pretrained(modelName) | |
## as its the JSON function, we need to specify other function in order to be specific. | |
tokenizer(dataset["text"], padding="max_length", truncation=True) | |