import os | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline, AutoModelForCausalLM | |
from transformers import LEDForConditionalGeneration, LEDTokenizer | |
from langchain_openai import OpenAI | |
# from huggingface_hub import login | |
from dotenv import load_dotenv | |
from logging import getLogger | |
# import streamlit as st | |
import torch | |
load_dotenv() | |
hf_token = os.environ.get("HF_TOKEN") | |
# # hf_token = st.secrets["HF_TOKEN"] | |
# login(token=hf_token) | |
logger = getLogger(__name__) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
def get_local_model(model_name_or_path:str)->pipeline: | |
#print(f"Model is running on {device}") | |
#!!!!!Removed for Llama model | |
# tokenizer = AutoTokenizer.from_pretrained( | |
# model_name_or_path, | |
# token = hf_token | |
# ) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name_or_path, | |
torch_dtype=torch.bfloat16, | |
# load_in_4bit = True, | |
token = hf_token | |
) | |
#!!!!!!!!!!!!!!!!!!!!!Removed for Llama model!!!!!!!!!!!!!!!!!!!!!!! | |
# pipe = pipeline( | |
# task = "summarization", | |
# model=model, | |
# tokenizer=tokenizer, | |
# device = device, | |
# max_new_tokens = 400, | |
# model_kwargs = {"max_length":16384, "max_new_tokens": 512}, | |
# ) | |
logger.info(f"Summarization pipeline created and loaded to {device}") | |
return model | |
def get_endpoint(api_key:str): | |
llm = OpenAI(openai_api_key=api_key) | |
return llm | |
def get_model(model_type,model_name_or_path,api_key = None): | |
if model_type == "openai": | |
return get_endpoint(api_key) | |
else: | |
return get_local_model(model_name_or_path) | |