import numpy as np
import pandas as pd
from string import Template
import streamlit as st
import base64
from datasets import load_dataset
from datasets import Dataset
import torch
from tqdm import tqdm
from peft import LoraConfig, get_peft_model
import transformers
# from transformers import AutoModelForCausalLM, AdapterConfig
from transformers import AutoConfig,AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from transformers import TrainingArguments
from peft import LoraConfig
from peft import *
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from langchain.prompts import PromptTemplate
from IPython.display import Markdown, display
peft_model_id = "./"
config = PeftConfig.from_pretrained(peft_model_id)
quantization_config = BitsAndBytesConfig(
llm_int8_enable_fp32_cpu_offload=True, # Enable offloading to CPU in float32 precision
load_in_8bit_fp32_cpu_offload=True,
bnb_8bit_use_fp16=False,
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
device_map = {
"transformer.word_embeddings": "cpu",
"transformer.word_embeddings_layernorm": "cpu",
"lm_head": "cpu",
"transformer.h": "cpu",
"transformer.ln_f": "cpu",
}
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=quantization_config,
device_map=device_map,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, peft_model_id)
prompt_template = """Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A, B, C, D, E] \
in order of the most likely to be correct to the least likely to be correct.'
Question: {prompt}\n
A) {a}\n
B) {b}\n
C) {c}\n
D) {d}\n
E) {e}\n
Answer: """
prompt = PromptTemplate(template=prompt_template, input_variables=['prompt', 'a', 'b', 'c', 'd', 'e'])
def format_text_to_prompt(example):
ans = prompt.format(prompt=example['prompt'],
a=example['A'],
b=example['B'],
c=example['C'],
d=example['D'],
e=example['E'])
return {"ans": ans}
def get_ans(text):
inputs = tokenizer(text, return_tensors='pt')
logits = model(input_ids=inputs['input_ids'].cuda(), attention_mask=inputs['attention_mask'].cuda()).logits[0, -1]
# Create a list of tuples having (logit, 'option') format
options_list = [(logits[tokenizer(' A').input_ids[-1]], 'A'), (logits[tokenizer(' B').input_ids[-1]], 'B'), (logits[tokenizer(' C').input_ids[-1]], 'C'), (logits[tokenizer(' D').input_ids[-1]], 'D'), (logits[tokenizer(' E').input_ids[-1]], 'E')]
options_list = sorted(options_list, reverse=True)
ans_list = []
for i in range(3):
ans_list.append(options_list[i][1])
return ans_list
def get_base64_of_bin_file(bin_file):
with open(bin_file, 'rb') as f:
data = f.read()
return base64.b64encode(data).decode()
def set_png_as_page_bg(png_file):
img = get_base64_of_bin_file(png_file)
page_bg_img = f"""
"""
st.markdown(page_bg_img, unsafe_allow_html=True)
def get_base64_encoded_image(image_path):
with open(image_path, "rb") as img_file:
encoded_string = base64.b64encode(img_file.read()).decode("utf-8")
return encoded_string
def main():
set_png_as_page_bg("net_technology_5407.jpg")
image_path = "artificial-intelligence.jpg" # Replace with the actual image file path
st.title("Sci-mcq-GPT")
link = "https://drive.google.com/file/d/1_2TqNNyoczhxIBmU7BpOzEi2bu3MC-sx/view?usp=sharing"
icon_path = "pdf download logo.png"
encoded_image = get_base64_encoded_image(icon_path)
lnk = f''
col = st.sidebar
col.markdown(lnk, unsafe_allow_html=True)
st.subheader("Ask Q&A")
col1, col2 = st.columns(2)
query = col1.text_area("Enter your question")
if col1.button("Get Answer"):
ans = get_ans(query)
print(ans)
col2.text_area("Sci-mcq-GPT Response", ans)
else:
col2.text_area("Sci-mcq-GPT Response", value="")
col_sidebar = st.sidebar
col_sidebar.image(image_path, caption=" ", width=300)
if __name__ == "__main__":
main()