Model Card for Model ID
This a Mistral 7b Quantized trained on Academic Short QA model . It is fine tuned using Qlora technique and it is trainde till around 500 step with loss around 0.450
Requirements
!pip install gradio
!pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
import os
os.environ["WANDB_DISABLED"] = "true"
Gradio App
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
model_id = "DisgustingOzil/Academic-ShortQA-Generator"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
def partition_text(text, partition_size):
words = text.split()
total_words = len(words)
words_per_partition = total_words // partition_size
partitions = []
for i in range(0, total_words, words_per_partition):
partition = " ".join(words[i:i+words_per_partition])
if len(partition) > 100: # Ensuring meaningful length for MCQ generation
partitions.append(partition)
return partitions
def generate_mcqs_for_partition(Instruction, partition, temperature, top_k):
inputs = tokenizer(alpaca_prompt.format(Instruction, partition, ""), return_tensors="pt")
outputs = model.generate(
**inputs,
max_length=512,
num_return_sequences=1,
temperature=temperature,
top_k=top_k
)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return output_text
def generate_mcqs(Instruction, text, partition_count, temperature, top_k):
partitions = partition_text(text, partition_count)
mcqs_output = []
for part in partitions:
output_text = generate_mcqs_for_partition(Instruction, part, temperature, top_k)
pattern = r'<question>(.*?)</question>.*?<answer>(.*?)</answer>'
matches = re.findall(pattern, output_text, re.DOTALL)
for match in matches:
question = match[0].strip()
correct_answer = match[1].strip()
mcqs_output.append(f"Question: {question}\nCorrect Answer: {correct_answer}\n")
return "\n".join(mcqs_output) if mcqs_output else "No MCQs could be generated from the input."
iface = gr.Interface(
fn=generate_mcqs,
inputs=[
gr.Textbox(label="Instruction"),
gr.Textbox(lines=10, label="Input Biology Text"),
gr.Slider(minimum=1, maximum=10, step=1, label="Partition Count"),
gr.Slider(minimum=0.5, maximum=1.0, step=0.05 , label="Temperature"),
gr.Slider(minimum=1, maximum=50, step=1, label="Top K")
],
outputs="text",
title="ShortQA Generator",
description="Enter a text about Biology to generate MCQs. Adjust the sliders to change the model's generation parameters."
)
if __name__ == "__main__":
iface.launch(debug=True, share=True)