Spaces:

harshagnihotri14
/

SOP_Generator

Sleeping

App Files Files Community

SOP_Generator / app.py

harshagnihotri14

Update app.py

6dec878 verified 4 months ago

raw

history blame contribute delete

8.78 kB

	import gradio as gr
	from transformers import GPT2Tokenizer, GPTNeoForCausalLM
	import torch
	import traceback
	import gc
	import psutil
	import time

	# Enable garbage collection
	gc.enable()

	# Properly handle CUDA settings
	if torch.cuda.is_available():
	torch.backends.cudnn.benchmark = False
	torch.backends.cudnn.deterministic = True

	# Global variables
	model = None
	tokenizer = None

	def log_memory_usage():
	process = psutil.Process()
	memory_info = process.memory_info()
	print(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")

	def load_model():
	global model, tokenizer
	try:
	print("Starting model loading...")
	log_memory_usage()

	tokenizer = GPT2Tokenizer.from_pretrained(
	"harshagnihotri14/SOP_Generator",
	local_files_only=False
	)

	model = GPTNeoForCausalLM.from_pretrained(
	"harshagnihotri14/SOP_Generator",
	low_cpu_mem_usage=True,
	torch_dtype=torch.float32,
	device_map='auto'
	)

	print("Model loaded successfully")
	log_memory_usage()
	return True
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	return False

	def cleanup():
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	log_memory_usage()

	def generate_sop_with_retry(name, course_name, university, university_location, academic_background,
	marks, subjects, work_experience, extracurricular_activities,
	interests, short_term_goals, long_term_goals, max_retries=2):
	"""Attempt to generate SOP multiple times if output is too short"""
	for attempt in range(max_retries):
	sop = generate_sop_attempt(name, course_name, university, university_location,
	academic_background, marks, subjects, work_experience,
	extracurricular_activities, interests, short_term_goals,
	long_term_goals)

	if not isinstance(sop, str) or "Error:" in sop:
	continue

	word_count = len(sop.split())
	if word_count >= 800: # Reduced from 1000 for better performance
	return sop

	return "Error: Unable to generate a sufficiently detailed SOP after multiple attempts. Please try again with more detailed input information."

	def generate_sop_attempt(name, course_name, university, university_location, academic_background,
	marks, subjects, work_experience, extracurricular_activities,
	interests, short_term_goals, long_term_goals, progress=gr.Progress()):

	try:
	if model is None or tokenizer is None:
	progress(0.1, desc="Loading model...")
	success = load_model()
	if not success:
	return "Error: Failed to load the model. Please try again."

	progress(0.3, desc="Preparing prompt...")

	# Simplified prompt structure for better performance
	prompt = (
	f"Write a Statement of Purpose (SOP) for {name} applying to {course_name} "
	f"at {university}, {university_location}.\n\n"

	f"Academic Background: {academic_background}\n"
	f"Academic Performance: {marks}\n"
	f"Key Subjects: {subjects}\n"
	f"Work Experience: {work_experience}\n"
	f"Extracurricular Activities: {extracurricular_activities}\n"
	f"Interests: {interests}\n"
	f"Short-term Goals: {short_term_goals}\n"
	f"Long-term Goals: {long_term_goals}\n\n"

	"Write a comprehensive SOP (800-1000 words) that includes:\n"
	"1. Strong introduction with personal motivation\n"
	"2. Academic achievements and relevant coursework\n"
	"3. Professional experience and projects\n"
	"4. Research interests and activities\n"
	"5. Extracurricular activities and leadership\n"
	"6. Career goals and program fit\n"
	"7. Conclusion\n\n"

	"BEGIN SOP:\n"
	)

	progress(0.5, desc="Tokenizing input...")
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)

	progress(0.7, desc="Generating SOP...")

	# Optimized generation parameters for CPU
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_length=2048, # Reduced for better performance
	min_length=500,
	temperature=0.7,
	repetition_penalty=1.5,
	num_beams=4, # Reduced for better performance
	length_penalty=2.0,
	top_p=0.85,
	top_k=50,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	progress(0.9, desc="Processing output...")
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract the SOP content
	sop_start = generated_text.find("BEGIN SOP:")
	if sop_start != -1:
	sop_text = generated_text[sop_start + len("BEGIN SOP:"):].strip()
	else:
	sop_text = generated_text.split("Now, write a detailed")[-1].strip()

	# Format the final output
	formatted_sop = (
	"STATEMENT OF PURPOSE\n\n"
	f"Name: {name}\n"
	f"Program: {course_name}\n"
	f"University: {university}, {university_location}\n\n"
	f"{sop_text}"
	)

	cleanup()
	progress(1.0, desc="Done!")

	return formatted_sop

	except Exception as e:
	cleanup()
	error_msg = f"Error during SOP generation: {str(e)}\n{traceback.format_exc()}"
	print(error_msg)
	return f"An error occurred while generating the SOP. Please try again. Error: {str(e)}"

	def generate_sop(*args):
	"""Wrapper function to handle the retry logic"""
	return generate_sop_with_retry(*args)

	# Gradio interface with optimized settings
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# Professional SOP Generator
	Generate a comprehensive Statement of Purpose for your graduate school application.

	Guidelines:
	- Provide detailed information in all fields
	- Include specific examples and achievements
	- The generated SOP will be approximately 800-1000 words
	""")

	with gr.Row():
	with gr.Column():
	name = gr.Textbox(label="Full Name", placeholder="Enter your full name")
	course_name = gr.Textbox(label="Course Name", placeholder="Enter the program name")
	university = gr.Textbox(label="University", placeholder="Enter university name")
	university_location = gr.Textbox(label="University Location", placeholder="City, Country")
	academic_background = gr.Textbox(label="Academic Background", placeholder="Your current/previous degree")
	marks = gr.Textbox(label="Academic Marks/Grades", placeholder="Your GPA or percentage")

	with gr.Column():
	subjects = gr.Textbox(label="Major Subjects", placeholder="List your major subjects")
	work_experience = gr.Textbox(label="Work Experience", placeholder="Relevant work experience")
	extracurricular_activities = gr.Textbox(label="Extracurricular Activities", placeholder="List your activities")
	interests = gr.Textbox(label="Personal Interests", placeholder="Your interests and hobbies")
	short_term_goals = gr.Textbox(label="Short Term Goals", placeholder="Your immediate career goals")
	long_term_goals = gr.Textbox(label="Long Term Goals", placeholder="Your long-term career aspirations")

	submit = gr.Button("Generate SOP", variant="primary")
	output = gr.Textbox(label="Generated SOP", lines=30)

	# Fixed submit.click() without invalid parameters
	submit.click(
	fn=generate_sop,
	inputs=[
	name, course_name, university, university_location, academic_background,
	marks, subjects, work_experience, extracurricular_activities,
	interests, short_term_goals, long_term_goals
	],
	outputs=output
	)

	if __name__ == "__main__":
	# Configure Gradio queue for better resource management
	demo.queue(max_size=5) # Limit concurrent requests
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_error=True,
	debug=True,
	)