Adwitiya_quizbot_2.0

Running

App Files Files Community

Adwitiya_quizbot_2.0 / app.py

NCTCMumbai

Update app.py

24d1627 verified 5 months ago

raw

history blame

18.6 kB


	# ##############??????????????????????????????
	# import pandas as pd
	# import json
	# import gradio as gr
	# from pathlib import Path
	# from ragatouille import RAGPretrainedModel
	# from gradio_client import Client
	# from jinja2 import Environment, FileSystemLoader
	# from tempfile import NamedTemporaryFile

	# VECTOR_COLUMN_NAME = "vector"
	# TEXT_COLUMN_NAME = "text"
	# #proj_dir = Path(__file__).parent
	# proj_dir = Path.cwd()
	# # Setting up the logging
	# import logging
	# logging.basicConfig(level=logging.INFO)
	# logger = logging.getLogger(__name__)

	# # Replace Mixtral client with Qwen Client
	# client = Client("Qwen/Qwen1.5-110B-Chat-demo")

	# # Set up the template environment with the templates directory
	# env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))

	# # Load the templates directly from the environment
	# template = env.get_template('template.j2')
	# template_html = env.get_template('template_html.j2')

	# def system_instructions(question_difficulty, topic, documents_str):
	# return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". example is 'A10':'Q10:C3' [/INST]"""

	# # RA
	# RAG_db = gr.State()

	# def json_to_excel(output_json):
	# # Initialize list for DataFrame
	# data = []
	# gr.Warning('Generating Shareable file link..',duration=30)
	# for i in range(1, 11): # Assuming there are 10 questions
	# question_key = f"Q{i}"
	# answer_key = f"A{i}"

	# question = output_json.get(question_key, '')
	# correct_answer_key = output_json.get(answer_key, '')
	# correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''

	# # Extract options
	# option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
	# options = [output_json.get(key, '') for key in option_keys]

	# # Add data row
	# data.append([
	# question, # Question Text
	# "Multiple Choice", # Question Type
	# options[0], # Option 1
	# options[1], # Option 2
	# options[2] if len(options) > 2 else '', # Option 3
	# options[3] if len(options) > 3 else '', # Option 4
	# options[4] if len(options) > 4 else '', # Option 5
	# correct_answer, # Correct Answer
	# 30, # Time in seconds
	# '' # Image Link
	# ])

	# # Create DataFrame
	# df = pd.DataFrame(data, columns=[
	# "Question Text",
	# "Question Type",
	# "Option 1",
	# "Option 2",
	# "Option 3",
	# "Option 4",
	# "Option 5",
	# "Correct Answer",
	# "Time in seconds",
	# "Image Link"
	# ])
	# # Create a temporary file and save the DataFrame to it
	# temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
	# df.to_excel(temp_file.name, index=False)

	# # # Save to Excel file
	# # excel_path = proj_dir / "quiz_questions.xlsx"
	# # df.to_excel(str(excel_path), index=False)
	# return temp_file.name

	# with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green"), css="style.css") as QUIZBOT:
	# def load_model():
	# RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
	# RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
	# return 'Ready to Go!!'

	# with gr.Column(scale=4):
	# gr.HTML("""
	# <center>
	# <h1><span style="color: purple;">ADWITIYA</span> Customs Manual Quizbot</h1>
	# <h2>Generative AI-powered Capacity building for Training Officers</h2>
	# <i>⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions ! ⚠️</i>
	# </center>
	# """)

	# with gr.Column(scale=2):
	# load_btn = gr.Button("Click to Load!🚀")
	# load_text = gr.Textbox()
	# load_btn.click(load_model, [], load_text)

	# topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")

	# with gr.Row():
	# radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")

	# generate_quiz_btn = gr.Button("Generate Quiz!🚀")
	# quiz_msg = gr.Textbox()

	# question_radios = [gr.Radio(visible=False) for _ in range(10)]

	# #@gr.dependencies.GPU
	# @generate_quiz_btn.click(inputs=[radio, topic], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")], api_name="generate_quiz")
	# def generate_quiz(question_difficulty, topic):
	# top_k_rank = 10
	# RAG_db_ = RAG_db.value
	# documents_full = RAG_db_.search(topic, k=top_k_rank)

	# gr.Warning('Generation of Quiz may take 1 to 2 minutes. Please wait.',duration=60)

	# question_radio_list = []
	# excel_file = None

	# count = 0
	# while count <= 3:
	# try:
	# documents = [item['content'] for item in documents_full]
	# document_summaries = [f"[DOCUMENT {i + 1}]: {summary}{count}" for i, summary in enumerate(documents)]
	# documents_str = '\n'.join(document_summaries)
	# formatted_prompt = system_instructions(question_difficulty, topic, documents_str)

	# print(formatted_prompt)

	# # Use Qwen Client for quiz generation
	# response = client.predict(
	# query=formatted_prompt,
	# history=[],
	# system="You are a helpful assistant.",
	# api_name="/model_chat"
	# )
	# print(response)
	# response1 = response[1][0][1]

	# # Find the first and last curly braces
	# start_index = response1.find('{')
	# end_index = response1.rfind('}')

	# # Extract only the JSON part
	# if start_index != -1 and end_index != -1:
	# cleaned_response = response1[start_index:end_index + 1]

	# # Try parsing the cleaned JSON
	# try:
	# output_json = json.loads(cleaned_response)
	# print('Parsed JSON:', output_json)
	# global quiz_data
	# quiz_data = output_json

	# # Generate the Excel file
	# excel_file = json_to_excel(output_json)

	# for question_num in range(1, 11):
	# question_key = f"Q{question_num}"
	# answer_key = f"A{question_num}"

	# question = quiz_data.get(question_key)
	# answer = quiz_data.get(quiz_data.get(answer_key))

	# if not question or not answer:
	# continue

	# choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
	# choice_list = [quiz_data.get(choice_key, "Choice not found") for choice_key in choice_keys]

	# radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
	# question_radio_list.append(radio)
	# print('question_radio_list', question_radio_list)

	# if len(question_radio_list) == 10:
	# break
	# else:
	# print('10 questions not generated. Trying again!')
	# count += 1
	# continue
	# except json.JSONDecodeError as e:
	# print(f"Failed to decode JSON: {e}")
	# else:
	# print("No valid JSON found in the response.")

	# except Exception as e:
	# count += 1
	# print(f"Exception occurred: {e}")
	# if count == 3:
	# print('Retry exhausted')
	# gr.Warning('Sorry. Please try with another topic!')
	# else:
	# print(f"Trying again.. {count} time... please wait")
	# continue

	# return ['Quiz Generated!'] + question_radio_list + [excel_file]

	# check_button = gr.Button("Check Score")
	# score_textbox = gr.Markdown()

	# @check_button.click(inputs=question_radios, outputs=score_textbox)
	# def compare_answers(*user_answers):
	# user_answer_list = list(user_answers)
	# answers_list = []

	# for question_num in range(1, 20):
	# answer_key = f"A{question_num}"
	# answer = quiz_data.get(quiz_data.get(answer_key))
	# if not answer:
	# break
	# answers_list.append(answer)

	# score = sum(1 for item in user_answer_list if item in answers_list)

	# if score > 7:
	# message = f"### Excellent! You got {score} out of 10!"
	# elif score > 5:
	# message = f"### Good! You got {score} out of 10!"
	# else:
	# message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"

	# return message

	# QUIZBOT.queue()
	# QUIZBOT.launch(debug=True)

	#?????????????????????????????????
	import pandas as pd
	import json
	import gradio as gr
	from pathlib import Path
	from ragatouille import RAGPretrainedModel
	from gradio_client import Client
	from tempfile import NamedTemporaryFile
	from sentence_transformers import CrossEncoder
	import numpy as np
	from time import perf_counter
	from sentence_transformers import CrossEncoder
	from backend.semantic_search import table, retriever

	VECTOR_COLUMN_NAME = "vector"
	TEXT_COLUMN_NAME = "text"
	proj_dir = Path.cwd()

	# Set up logging
	import logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Replace Mixtral client with Qwen Client
	client = Client("Qwen/Qwen1.5-110B-Chat-demo")

	def system_instructions(question_difficulty, topic, documents_str):
	return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""

	# RA
	RAG_db = gr.State()

	def json_to_excel(output_json):
	# Initialize list for DataFrame
	data = []
	gr.Warning('Generating Shareable file link..', duration=30)
	for i in range(1, 11): # Assuming there are 10 questions
	question_key = f"Q{i}"
	answer_key = f"A{i}"

	question = output_json.get(question_key, '')
	correct_answer_key = output_json.get(answer_key, '')
	#correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
	correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''

	# Extract options
	option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
	options = [output_json.get(key, '') for key in option_keys]

	# Add data row
	data.append([
	question, # Question Text
	"Multiple Choice", # Question Type
	options[0], # Option 1
	options[1], # Option 2
	options[2] if len(options) > 2 else '', # Option 3
	options[3] if len(options) > 3 else '', # Option 4
	options[4] if len(options) > 4 else '', # Option 5
	correct_answer, # Correct Answer
	30, # Time in seconds
	'' # Image Link
	])

	# Create DataFrame
	df = pd.DataFrame(data, columns=[
	"Question Text",
	"Question Type",
	"Option 1",
	"Option 2",
	"Option 3",
	"Option 4",
	"Option 5",
	"Correct Answer",
	"Time in seconds",
	"Image Link"
	])

	temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
	df.to_excel(temp_file.name, index=False)
	return temp_file.name

	with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green")) as QUIZBOT:
	with gr.Column(scale=4):
	gr.HTML("""
	<center>
	<h1><span style="color: purple;">ADWITIYA</span> Customs Manual Quizbot</h1>
	<h2>Generative AI-powered Capacity building for Training Officers</h2>
	<i>⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions ! ⚠️</i>
	</center>
	""")

	topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")

	with gr.Row():
	difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
	model_radio = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'],
	value='(ACCURATE) BGE reranker', label="Embeddings",
	info="First query to ColBERT may take a little time")

	generate_quiz_btn = gr.Button("Generate Quiz!🚀")
	quiz_msg = gr.Textbox()

	question_radios = [gr.Radio(visible=False) for _ in range(10)]

	@generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
	def generate_quiz(question_difficulty, topic, cross_encoder):
	top_k_rank = 10
	documents = []
	gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)

	if cross_encoder == '(HIGH ACCURATE) ColBERT':
	gr.Warning('Retrieving using ColBERT.. First-time query will take a minute for model to load.. please wait')
	RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
	RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
	documents_full = RAG_db.value.search(topic, k=top_k_rank)
	documents = [item['content'] for item in documents_full]

	else:
	document_start = perf_counter()
	query_vec = retriever.encode(topic)
	doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)

	documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
	documents = [doc[TEXT_COLUMN_NAME] for doc in documents]

	query_doc_pair = [[topic, doc] for doc in documents]

	if cross_encoder == '(FAST) MiniLM-L6v2':
	cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
	elif cross_encoder == '(ACCURATE) BGE reranker':
	cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')

	cross_scores = cross_encoder1.predict(query_doc_pair)
	sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
	documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]

	formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))

	try:
	response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
	response1 = response[1][0][1]

	# Extract JSON
	start_index = response1.find('{')
	end_index = response1.rfind('}')
	cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
	output_json = json.loads(cleaned_response)

	# Generate the Excel file
	excel_file = json_to_excel(output_json)

	question_radio_list = []
	for question_num in range(1, 11):
	question_key = f"Q{question_num}"
	answer_key = f"A{question_num}"

	question = output_json.get(question_key)
	answer = output_json.get(output_json.get(answer_key))

	if not question or not answer:
	continue

	choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
	choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]

	radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
	question_radio_list.append(radio)

	return ['Quiz Generated!'] + question_radio_list + [excel_file]

	except json.JSONDecodeError as e:
	print(f"Failed to decode JSON: {e}")

	check_button = gr.Button("Check Score")
	score_textbox = gr.Markdown()

	@check_button.click(inputs=question_radios, outputs=score_textbox)
	def compare_answers(*user_answers):
	user_answer_list = list(user_answers)
	answers_list = []

	for question_num in range(1, 20):
	answer_key = f"A{question_num}"
	answer = quiz_data.get(quiz_data.get(answer_key))
	if not answer:
	break
	answers_list.append(answer)

	score = sum(1 for item in user_answer_list if item in answers_list)

	if score > 7:
	message = f"### Excellent! You got {score} out of 10!"
	elif score > 5:
	message = f"### Good! You got {score} out of 10!"
	else:
	message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"

	return message

	QUIZBOT.queue()
	QUIZBOT.launch(debug=True)