SOP_Generator / app.py
harshagnihotri14's picture
Update app.py
6dec878 verified
import gradio as gr
from transformers import GPT2Tokenizer, GPTNeoForCausalLM
import torch
import traceback
import gc
import psutil
import time
# Enable garbage collection
gc.enable()
# Properly handle CUDA settings
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
# Global variables
model = None
tokenizer = None
def log_memory_usage():
process = psutil.Process()
memory_info = process.memory_info()
print(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
def load_model():
global model, tokenizer
try:
print("Starting model loading...")
log_memory_usage()
tokenizer = GPT2Tokenizer.from_pretrained(
"harshagnihotri14/SOP_Generator",
local_files_only=False
)
model = GPTNeoForCausalLM.from_pretrained(
"harshagnihotri14/SOP_Generator",
low_cpu_mem_usage=True,
torch_dtype=torch.float32,
device_map='auto'
)
print("Model loaded successfully")
log_memory_usage()
return True
except Exception as e:
print(f"Error loading model: {str(e)}")
return False
def cleanup():
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
log_memory_usage()
def generate_sop_with_retry(name, course_name, university, university_location, academic_background,
marks, subjects, work_experience, extracurricular_activities,
interests, short_term_goals, long_term_goals, max_retries=2):
"""Attempt to generate SOP multiple times if output is too short"""
for attempt in range(max_retries):
sop = generate_sop_attempt(name, course_name, university, university_location,
academic_background, marks, subjects, work_experience,
extracurricular_activities, interests, short_term_goals,
long_term_goals)
if not isinstance(sop, str) or "Error:" in sop:
continue
word_count = len(sop.split())
if word_count >= 800: # Reduced from 1000 for better performance
return sop
return "Error: Unable to generate a sufficiently detailed SOP after multiple attempts. Please try again with more detailed input information."
def generate_sop_attempt(name, course_name, university, university_location, academic_background,
marks, subjects, work_experience, extracurricular_activities,
interests, short_term_goals, long_term_goals, progress=gr.Progress()):
try:
if model is None or tokenizer is None:
progress(0.1, desc="Loading model...")
success = load_model()
if not success:
return "Error: Failed to load the model. Please try again."
progress(0.3, desc="Preparing prompt...")
# Simplified prompt structure for better performance
prompt = (
f"Write a Statement of Purpose (SOP) for {name} applying to {course_name} "
f"at {university}, {university_location}.\n\n"
f"Academic Background: {academic_background}\n"
f"Academic Performance: {marks}\n"
f"Key Subjects: {subjects}\n"
f"Work Experience: {work_experience}\n"
f"Extracurricular Activities: {extracurricular_activities}\n"
f"Interests: {interests}\n"
f"Short-term Goals: {short_term_goals}\n"
f"Long-term Goals: {long_term_goals}\n\n"
"Write a comprehensive SOP (800-1000 words) that includes:\n"
"1. Strong introduction with personal motivation\n"
"2. Academic achievements and relevant coursework\n"
"3. Professional experience and projects\n"
"4. Research interests and activities\n"
"5. Extracurricular activities and leadership\n"
"6. Career goals and program fit\n"
"7. Conclusion\n\n"
"BEGIN SOP:\n"
)
progress(0.5, desc="Tokenizing input...")
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
progress(0.7, desc="Generating SOP...")
# Optimized generation parameters for CPU
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=2048, # Reduced for better performance
min_length=500,
temperature=0.7,
repetition_penalty=1.5,
num_beams=4, # Reduced for better performance
length_penalty=2.0,
top_p=0.85,
top_k=50,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
progress(0.9, desc="Processing output...")
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract the SOP content
sop_start = generated_text.find("BEGIN SOP:")
if sop_start != -1:
sop_text = generated_text[sop_start + len("BEGIN SOP:"):].strip()
else:
sop_text = generated_text.split("Now, write a detailed")[-1].strip()
# Format the final output
formatted_sop = (
"STATEMENT OF PURPOSE\n\n"
f"Name: {name}\n"
f"Program: {course_name}\n"
f"University: {university}, {university_location}\n\n"
f"{sop_text}"
)
cleanup()
progress(1.0, desc="Done!")
return formatted_sop
except Exception as e:
cleanup()
error_msg = f"Error during SOP generation: {str(e)}\n{traceback.format_exc()}"
print(error_msg)
return f"An error occurred while generating the SOP. Please try again. Error: {str(e)}"
def generate_sop(*args):
"""Wrapper function to handle the retry logic"""
return generate_sop_with_retry(*args)
# Gradio interface with optimized settings
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# Professional SOP Generator
Generate a comprehensive Statement of Purpose for your graduate school application.
**Guidelines:**
- Provide detailed information in all fields
- Include specific examples and achievements
- The generated SOP will be approximately 800-1000 words
""")
with gr.Row():
with gr.Column():
name = gr.Textbox(label="Full Name", placeholder="Enter your full name")
course_name = gr.Textbox(label="Course Name", placeholder="Enter the program name")
university = gr.Textbox(label="University", placeholder="Enter university name")
university_location = gr.Textbox(label="University Location", placeholder="City, Country")
academic_background = gr.Textbox(label="Academic Background", placeholder="Your current/previous degree")
marks = gr.Textbox(label="Academic Marks/Grades", placeholder="Your GPA or percentage")
with gr.Column():
subjects = gr.Textbox(label="Major Subjects", placeholder="List your major subjects")
work_experience = gr.Textbox(label="Work Experience", placeholder="Relevant work experience")
extracurricular_activities = gr.Textbox(label="Extracurricular Activities", placeholder="List your activities")
interests = gr.Textbox(label="Personal Interests", placeholder="Your interests and hobbies")
short_term_goals = gr.Textbox(label="Short Term Goals", placeholder="Your immediate career goals")
long_term_goals = gr.Textbox(label="Long Term Goals", placeholder="Your long-term career aspirations")
submit = gr.Button("Generate SOP", variant="primary")
output = gr.Textbox(label="Generated SOP", lines=30)
# Fixed submit.click() without invalid parameters
submit.click(
fn=generate_sop,
inputs=[
name, course_name, university, university_location, academic_background,
marks, subjects, work_experience, extracurricular_activities,
interests, short_term_goals, long_term_goals
],
outputs=output
)
if __name__ == "__main__":
# Configure Gradio queue for better resource management
demo.queue(max_size=5) # Limit concurrent requests
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
show_error=True,
debug=True,
)