ADWITIYA Customs Manual Quizbot


import pandas as pd
import json
import gradio as gr
from pathlib import Path
from ragatouille import RAGPretrainedModel
from gradio_client import Client
from tempfile import NamedTemporaryFile
from sentence_transformers import CrossEncoder
import numpy as np
from time import perf_counter
from sentence_transformers import CrossEncoder
from backend.semantic_search import table, retriever

VECTOR_COLUMN_NAME = "vector"
TEXT_COLUMN_NAME = "text"
proj_dir = Path.cwd()

# Set up logging
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Replace Mixtral client with Qwen Client
client = Client("Qwen/Qwen1.5-110B-Chat-demo")

def system_instructions(question_difficulty, topic, documents_str):
    return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""

# RA
RAG_db = gr.State()
quiz_data = None
def json_to_excel(output_json):
    # Initialize list for DataFrame
    data = []
    gr.Warning('Generating Shareable file link..', duration=30)
    for i in range(1, 11):  # Assuming there are 10 questions
        question_key = f"Q{i}"
        answer_key = f"A{i}"

        question = output_json.get(question_key, '')
        correct_answer_key = output_json.get(answer_key, '')
        #correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
        correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''

        # Extract options
        option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
        options = [output_json.get(key, '') for key in option_keys]
        
        # Add data row
        data.append([
            question,                     # Question Text
            "Multiple Choice",            # Question Type
            options[0],                   # Option 1
            options[1],                   # Option 2
            options[2] if len(options) > 2 else '',  # Option 3
            options[3] if len(options) > 3 else '',  # Option 4
            options[4] if len(options) > 4 else '',  # Option 5
            correct_answer,               # Correct Answer
            30,                           # Time in seconds
            ''                            # Image Link
        ])

    # Create DataFrame
    df = pd.DataFrame(data, columns=[
        "Question Text",
        "Question Type",
        "Option 1",
        "Option 2",
        "Option 3",
        "Option 4",
        "Option 5",
        "Correct Answer",
        "Time in seconds",
        "Image Link"
    ])

    temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
    df.to_excel(temp_file.name, index=False)
    return temp_file.name
# Define a colorful theme
colorful_theme = gr.themes.Default(
    primary_hue="cyan",      # Set a bright cyan as primary color
    secondary_hue="yellow", # Set a bright magenta as secondary color
    neutral_hue="purple"  # Optionally set a neutral color
        
)
#with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green")) as QUIZBOT:
with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
    
    
    # Create a single row for the HTML and Image
    with gr.Row():
        with gr.Column(scale=2):
            gr.Image(value='logo.png', height=200, width=200)
        with gr.Column(scale=6):
            gr.HTML("""
            <center>
                <h1><span style="color: purple;">ADWITIYA</span> Customs Manual Quizbot</h1>
                <h2>Generative AI-powered Capacity building for Training Officers</h2>
                <i>⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions ! ⚠️</i>
            </center>
            """)
        

    topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")

    with gr.Row():
        difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
        model_radio = gr.Radio(choices=[ '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], 
                               value='(ACCURATE) BGE reranker', label="Embeddings", 
                               info="First query to ColBERT may take a little time")

    generate_quiz_btn = gr.Button("Generate Quiz!🚀")
    quiz_msg = gr.Textbox()

    question_radios = [gr.Radio(visible=False) for _ in range(10)]

    @generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
    def generate_quiz(question_difficulty, topic, cross_encoder):
        top_k_rank = 10
        documents = []
        gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)

        if cross_encoder == '(HIGH ACCURATE) ColBERT':
            gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100)
            RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
            RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
            documents_full = RAG_db.value.search(topic, k=top_k_rank)
            documents = [item['content'] for item in documents_full]
        
        else:
            document_start = perf_counter()
            query_vec = retriever.encode(topic)
            doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)

            documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
            documents = [doc[TEXT_COLUMN_NAME] for doc in documents]

            query_doc_pair = [[topic, doc] for doc in documents]

            # if cross_encoder == '(FAST) MiniLM-L6v2':
            #     cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
            if cross_encoder == '(ACCURATE) BGE reranker':
                cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
            
            cross_scores = cross_encoder1.predict(query_doc_pair)
            sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
            documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
        
        formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
        print('                      Formatted Prompt : ' ,formatted_prompt)
        try:
            response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
            response1 = response[1][0][1]
            
            # Extract JSON
            start_index = response1.find('{')
            end_index = response1.rfind('}')
            cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
            print('Cleaned Response :',cleaned_response)
            output_json = json.loads(cleaned_response)
            # Assign the extracted JSON to quiz_data for use in the comparison function
            global quiz_data
            quiz_data = output_json
            # Generate the Excel file
            excel_file = json_to_excel(output_json)

            question_radio_list = []
            for question_num in range(1, 11):
                question_key = f"Q{question_num}"
                answer_key = f"A{question_num}"

                question = output_json.get(question_key)
                answer = output_json.get(output_json.get(answer_key))

                if not question or not answer:
                    continue

                choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
                choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]

                radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
                question_radio_list.append(radio)

            return ['Quiz Generated!'] + question_radio_list + [excel_file]

        except json.JSONDecodeError as e:
            print(f"Failed to decode JSON: {e}")

    check_button = gr.Button("Check Score")
    score_textbox = gr.Markdown()

    @check_button.click(inputs=question_radios, outputs=score_textbox)
    def compare_answers(*user_answers):
        user_answer_list = list(user_answers)
        answers_list = []

        for question_num in range(1, 20):
            answer_key = f"A{question_num}"
            answer = quiz_data.get(quiz_data.get(answer_key))
            if not answer:
                break
            answers_list.append(answer)

        score = sum(1 for item in user_answer_list if item in answers_list)

        if score > 7:
            message = f"### Excellent! You got {score} out of 10!"
        elif score > 5:
            message = f"### Good! You got {score} out of 10!"
        else:
            message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"

        return message

QUIZBOT.queue()
QUIZBOT.launch(debug=True)