Spaces:
Running
Running
File size: 5,579 Bytes
5144ac6 e51d2b2 13da39a 5144ac6 5dc2718 e51d2b2 5dc2718 e51d2b2 81fefa5 4faaac7 81fefa5 4faaac7 13da39a c5639ef 81fefa5 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 4faaac7 c5639ef 4faaac7 c5639ef 5144ac6 e51d2b2 5dc2718 e51d2b2 5dc2718 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 5144ac6 c5639ef 4faaac7 e51d2b2 5144ac6 e51d2b2 5144ac6 e51d2b2 8088ee4 5144ac6 8088ee4 5144ac6 8088ee4 5144ac6 a2dbc0f 5144ac6 e51d2b2 5144ac6 a2dbc0f 12f0b8d a2dbc0f 5144ac6 5dc2718 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
import time
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from TTS.api import TTS # Coqui TTS library
import PyPDF2
# Initialize Models
stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") # Fast STT model
embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2") # Optimized embedding model
gpt_model_name = "google/flan-t5-small" # Lightweight question generation model
gpt_tokenizer = AutoTokenizer.from_pretrained(gpt_model_name)
gpt_model = AutoModelForSeq2SeqLM.from_pretrained(gpt_model_name)
tts_model = TTS(model_name="tts_models/en/ljspeech/glow-tts", progress_bar=False, gpu=False) # Efficient TTS model
# Parse PDF and create resume content
def parse_resume(pdf):
reader = PyPDF2.PdfReader(pdf)
text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
return {"Resume Content": text}
# Process inputs
def process_inputs(resume, job_desc):
resume_embeddings = {
section: embedding_model.encode(content)
for section, content in parse_resume(resume).items()
}
job_desc_embedding = embedding_model.encode(job_desc)
return resume_embeddings, job_desc_embedding
# Generate a follow-up question using Flan-T5
def generate_question(response, resume_embeddings, job_desc):
user_embedding = embedding_model.encode(response)
similarities = {
section: cosine_similarity([user_embedding], [embedding])[0][0]
for section, embedding in resume_embeddings.items()
}
most_relevant_section = max(similarities, key=similarities.get)
prompt = f"You are a hiring manager. Based on the candidate's experience in {most_relevant_section} and the job description, ask a follow-up question."
inputs = gpt_tokenizer(prompt, return_tensors="pt", truncation=True)
outputs = gpt_model.generate(**inputs, max_length=50, num_beams=3, early_stopping=True)
question = gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
return question
# Generate TTS audio for a question
def generate_audio(question):
audio_path = "output.wav"
tts_model.tts_to_file(text=question, file_path=audio_path)
return audio_path
# Conduct a mock interview
class MockInterview:
def __init__(self):
self.resume_embeddings = None
self.job_desc_embedding = None
self.interview_active = False
self.current_question = None
def start_interview(self, resume, job_desc):
self.resume_embeddings, self.job_desc_embedding = process_inputs(resume, job_desc)
self.interview_active = True
self.current_question = "Tell me about yourself."
return self.current_question, generate_audio(self.current_question)
def next_interaction(self, user_audio):
if not self.interview_active:
return "Interview not started.", None
# Transcribe user's response
transcription = stt_model(user_audio)["text"]
if not transcription.strip():
return "No response detected. Please try again.", None
# Generate the next question
self.current_question = generate_question(transcription, self.resume_embeddings, self.job_desc_embedding)
return transcription, generate_audio(self.current_question)
def end_interview(self):
self.interview_active = False
return "Thank you for participating in the interview.", generate_audio("Thank you for participating in the interview. Goodbye!")
mock_interview = MockInterview()
def start_interview(resume, job_desc):
question, audio = mock_interview.start_interview(resume, job_desc)
return audio
def process_response(user_audio):
transcription, audio = mock_interview.next_interaction(user_audio)
return transcription, audio
def finalize_interview():
message, audio = mock_interview.end_interview()
return audio
# Gradio Interface
interface = gr.Blocks()
with interface:
gr.Markdown("""
## π§βπΌ Mock Interview AI
Welcome to the Mock Interview simulator! Follow these steps:
1. Upload your resume (PDF format).
2. Paste the job description.
3. Press "Submit" to start the interview.
The system will ask questions and listen to your responses automatically. Good luck!
""")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### Upload Details")
resume_input = gr.File(label="π Upload Resume (PDF)")
job_desc_input = gr.Textbox(
label="π Paste Job Description",
placeholder="Paste the job description here...",
lines=5,
)
submit_button = gr.Button("π Submit & Start Interview")
with gr.Column(scale=1):
gr.Markdown("### Interview Progress")
question_audio_output = gr.Audio(label="π€ Question Audio")
transcription_output = gr.Textbox(label="ποΈ Your Transcription", lines=3)
gr.Markdown("### Respond to Questions")
audio_input = gr.Audio(type="filepath", label="ποΈ Speak Your Answer") # Fixed Gradio audio input
submit_button.click(
start_interview, inputs=[resume_input, job_desc_input], outputs=[question_audio_output]
)
audio_input.change(
process_response, inputs=[audio_input], outputs=[transcription_output, question_audio_output]
)
if __name__ == "__main__":
interface.launch()
|