Lahiru Menikdiwela
changes done according to llama model
173b5f1
import os
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline, AutoModelForCausalLM
from transformers import LEDForConditionalGeneration, LEDTokenizer
from langchain_openai import OpenAI
# from huggingface_hub import login
from dotenv import load_dotenv
from logging import getLogger
# import streamlit as st
import torch
load_dotenv()
hf_token = os.environ.get("HF_TOKEN")
# # hf_token = st.secrets["HF_TOKEN"]
# login(token=hf_token)
logger = getLogger(__name__)
device = "cuda" if torch.cuda.is_available() else "cpu"
def get_local_model(model_name_or_path:str)->pipeline:
#print(f"Model is running on {device}")
#!!!!!Removed for Llama model
# tokenizer = AutoTokenizer.from_pretrained(
# model_name_or_path,
# token = hf_token
# )
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
torch_dtype=torch.bfloat16,
# load_in_4bit = True,
token = hf_token
)
#!!!!!!!!!!!!!!!!!!!!!Removed for Llama model!!!!!!!!!!!!!!!!!!!!!!!
# pipe = pipeline(
# task = "summarization",
# model=model,
# tokenizer=tokenizer,
# device = device,
# max_new_tokens = 400,
# model_kwargs = {"max_length":16384, "max_new_tokens": 512},
# )
logger.info(f"Summarization pipeline created and loaded to {device}")
return model
def get_endpoint(api_key:str):
llm = OpenAI(openai_api_key=api_key)
return llm
def get_model(model_type,model_name_or_path,api_key = None):
if model_type == "openai":
return get_endpoint(api_key)
else:
return get_local_model(model_name_or_path)