import numpy as np import pandas as pd from string import Template import streamlit as st import base64 from datasets import load_dataset from datasets import Dataset import torch from tqdm import tqdm from peft import LoraConfig, get_peft_model import transformers # from transformers import AutoModelForCausalLM, AdapterConfig from transformers import AutoConfig,AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer from transformers import TrainingArguments from peft import LoraConfig from peft import * from trl import SFTTrainer, DataCollatorForCompletionOnlyLM from langchain.prompts import PromptTemplate from IPython.display import Markdown, display peft_model_id = "./" config = PeftConfig.from_pretrained(peft_model_id) quantization_config = BitsAndBytesConfig( llm_int8_enable_fp32_cpu_offload=True, # Enable offloading to CPU in float32 precision load_in_8bit_fp32_cpu_offload=True, bnb_8bit_use_fp16=False, load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) device_map = { "transformer.word_embeddings": "cpu", "transformer.word_embeddings_layernorm": "cpu", "lm_head": "cpu", "transformer.h": "cpu", "transformer.ln_f": "cpu", } model = AutoModelForCausalLM.from_pretrained( config.base_model_name_or_path, return_dict=True, quantization_config=quantization_config, device_map=device_map, trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) tokenizer.pad_token = tokenizer.eos_token model = PeftModel.from_pretrained(model, peft_model_id) prompt_template = """Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A, B, C, D, E] \ in order of the most likely to be correct to the least likely to be correct.' Question: {prompt}\n A) {a}\n B) {b}\n C) {c}\n D) {d}\n E) {e}\n Answer: """ prompt = PromptTemplate(template=prompt_template, input_variables=['prompt', 'a', 'b', 'c', 'd', 'e']) def format_text_to_prompt(example): ans = prompt.format(prompt=example['prompt'], a=example['A'], b=example['B'], c=example['C'], d=example['D'], e=example['E']) return {"ans": ans} def get_ans(text): inputs = tokenizer(text, return_tensors='pt') logits = model(input_ids=inputs['input_ids'].cuda(), attention_mask=inputs['attention_mask'].cuda()).logits[0, -1] # Create a list of tuples having (logit, 'option') format options_list = [(logits[tokenizer(' A').input_ids[-1]], 'A'), (logits[tokenizer(' B').input_ids[-1]], 'B'), (logits[tokenizer(' C').input_ids[-1]], 'C'), (logits[tokenizer(' D').input_ids[-1]], 'D'), (logits[tokenizer(' E').input_ids[-1]], 'E')] options_list = sorted(options_list, reverse=True) ans_list = [] for i in range(3): ans_list.append(options_list[i][1]) return ans_list def get_base64_of_bin_file(bin_file): with open(bin_file, 'rb') as f: data = f.read() return base64.b64encode(data).decode() def set_png_as_page_bg(png_file): img = get_base64_of_bin_file(png_file) page_bg_img = f""" """ st.markdown(page_bg_img, unsafe_allow_html=True) def get_base64_encoded_image(image_path): with open(image_path, "rb") as img_file: encoded_string = base64.b64encode(img_file.read()).decode("utf-8") return encoded_string def main(): set_png_as_page_bg("net_technology_5407.jpg") image_path = "artificial-intelligence.jpg" # Replace with the actual image file path st.title("Sci-mcq-GPT") link = "https://drive.google.com/file/d/1_2TqNNyoczhxIBmU7BpOzEi2bu3MC-sx/view?usp=sharing" icon_path = "pdf download logo.png" encoded_image = get_base64_encoded_image(icon_path) lnk = f'' col = st.sidebar col.markdown(lnk, unsafe_allow_html=True) st.subheader("Ask Q&A") col1, col2 = st.columns(2) query = col1.text_area("Enter your question") if col1.button("Get Answer"): ans = get_ans(query) print(ans) col2.text_area("Sci-mcq-GPT Response", ans) else: col2.text_area("Sci-mcq-GPT Response", value="") col_sidebar = st.sidebar col_sidebar.image(image_path, caption=" ", width=300) if __name__ == "__main__": main()