Spaces:

danrdoran
/

ai_tutor_flan_t5_large

Sleeping

App Files Files Community

ai_tutor_flan_t5_large / app.py

danrdoran

Create app.py

8f92739 verified about 1 month ago

raw

history blame

2.35 kB

	import streamlit as st
	from transformers import T5ForConditionalGeneration, T5Tokenizer
	from peft import get_peft_model, LoraConfig

	# Define the same LoRA configuration used during fine-tuning
	lora_config = LoraConfig(
	r=8, # Low-rank parameter
	lora_alpha=32, # Scaling parameter
	lora_dropout=0.1, # Dropout rate
	target_modules=["q", "v"], # The attention layers to apply LoRA to
	bias="none"
	)

	# Load the model and tokenizer from Hugging Face's hub
	model = get_peft_model(T5ForConditionalGeneration.from_pretrained("google/flan-t5-large"), lora_config)
	tokenizer = T5Tokenizer.from_pretrained("danrdoran/flan-t5-simplified-squad")

	# Streamlit app UI
	st.title("AI English Tutor")
	st.write("Ask me a question, and I will help you!")

	# Sidebar for user to control model generation parameters
	st.sidebar.title("Model Parameters")
	temperature = st.sidebar.slider("Temperature", 0.1, 1.5, 1.0, 0.1) # Default 1.0
	top_p = st.sidebar.slider("Top-p (Nucleus Sampling)", 0.0, 1.0, 0.9, 0.05) # Default 0.9
	top_k = st.sidebar.slider("Top-k", 0, 100, 50, 1) # Default 50
	# Disable sampling when using beam search
	do_sample = st.sidebar.checkbox("Enable Random Sampling", value=False)

	# Input field for the student
	student_question = st.text_input("Ask your question!")

	# Generate and display response using the model's generate() function
	if student_question:
	# Prepare the input for the model
	input_text = f"You are a tutor. Explain the answer to this question to a young student: '{student_question}'"
	inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=256) # Reduced max_length to 256

	# Generate response
	generated_ids = model.generate(
	inputs['input_ids'],
	#max_length=75,
	#min_length=20,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	do_sample=True, # Disable sampling, using beam search
	#num_beams=2, # Use beam search
	no_repeat_ngram_size=3, # Prevent repeating phrases of 3 words or more
	length_penalty=1.0, # Discourage overly long responses
	early_stopping=False # Stops when it finds a sufficiently good output
	)

	# Decode the generated response
	response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

	st.write("Tutor's Answer:", response)