Anupam251272's picture
Update app.py
081374c verified
raw
history blame
18.5 kB
import gradio as gr
import torch
import spacy
import nltk
import re
import PyPDF2
import numpy as np
import pandas as pd
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# Download necessary NLTK resources
nltk.download('punkt')
# Load spaCy and Sentence Transformer models
nlp = spacy.load('en_core_web_sm')
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on: {device}")
# Updated career database
CAREER_RECOMMENDATIONS = [
{"title": "Software Engineer", "description": "Develops software applications and systems", "skills":["Python","Java","C++","JavaScript", "Software Development","Database Management","Web Development", "Cloud Computing","Data Structures", "Algorithms"]},
{"title": "Data Scientist", "description": "Analyzes complex data to help make business decisions","skills": ["Python","R","Statistics","Machine Learning","Data Visualization","Data Analysis","SQL"]},
{"title": "Cloud Solutions Architect", "description": "Designs and manages cloud computing strategies","skills":["Cloud Computing","AWS","Azure","GCP","Infrastructure as Code","Networking"]},
{"title": "AI/ML Engineer", "description": "Creates intelligent systems and machine learning models","skills": ["Machine Learning", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing"]},
{"title":"Database Administrator","description":"Manage databases, ensure data security","skills":["SQL", "Database Management", "Database Security", "Database Design","Database Modeling"]},
{"title": "Mechanical Engineer", "description": "Designs, develops, and tests mechanical devices and systems","skills": ["CAD","CAM","Matlab","Mechanical Design", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Fluid Mechanics", "GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis"]},
{"title": "Manufacturing Engineer", "description": "Optimizes manufacturing processes for efficiency and quality","skills": ["Manufacturing Engineering","Process Optimization","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management"]},
{"title":"Quality Engineer","description":"Oversees quality assurance activities and ensures products meet standards.","skills":["Quality Control","Quality Assurance","ISO Standards","Statistical Process Control","Inspection","Testing"]},
{"title": "Design Engineer", "description": "Creates product designs and technical drawings using CAD software","skills": ["CAD","CAM","Product Design","3D Modeling","Engineering Design","Drafting"]},
{"title": "Business Analyst", "description": "Identifies business needs and determines solutions","skills": ["Business Analysis", "Requirements Gathering", "Data Analysis", "Process Improvement", "Project Management"]},
{"title": "Marketing Manager", "description": "Develops and implements marketing strategies","skills":["Marketing","Digital Marketing","Social Media Marketing","Market Research","Branding","Advertising", "Content Marketing"]},
{"title": "Project Manager", "description": "Leads and coordinates project teams and resources","skills":["Project Management","Project Planning","Risk Management","Team Management","Agile Methodologies"]},
{"title": "Management Consultant", "description": "Advises organizations on improving performance","skills":["Consulting","Strategy","Problem Solving","Business Analysis","Communication"]},
{"title": "Graphic Designer", "description": "Creates visual concepts using computer software or by hand","skills": ["Graphic Design","Adobe Photoshop","Adobe Illustrator","UI/UX Design","Visual Communication","Branding"]},
{"title": "Content Strategist", "description": "Develops content plans and marketing strategies","skills":["Content Writing","Content Strategy","SEO","Content Marketing","Copywriting"]},
{"title": "UI/UX Designer", "description": "Designs user interfaces for digital products","skills":["UI Design","UX Design","Wireframing","Prototyping","User Research","Interaction Design"]},
{"title": "Digital Marketing Specialist", "description": "Promotes brands and products through digital channels","skills":["Digital Marketing","Social Media Marketing","SEO","PPC Advertising","Email Marketing","Content Marketing"]},
{"title": "Healthcare Administrator", "description": "Manages healthcare facilities and services","skills":["Healthcare Administration","Healthcare Management","Healthcare Policy","Healthcare Finance","Patient Care"]},
{"title": "Medical Researcher", "description": "Conducts research to improve medical knowledge","skills":["Medical Research","Data Analysis","Research Design","Laboratory Techniques","Scientific Writing"]},
{"title": "Healthcare Consultant", "description": "Advises healthcare organizations on improvement strategies","skills":["Healthcare Consulting", "Healthcare Strategy","Healthcare Operations","Healthcare Policy"]},
{"title":"Medical Assistant","description": "Assists with patient care and medical administrative tasks.","skills":["Patient Care","Medical Terminology","Medical Assisting","Clinical Procedures","Vital Signs","Electronic Health Records"]}
]
def extract_text_from_pdf(file_path):
"""
Extract text from PDF file
Args:
file_path (str): Path to the PDF file
Returns:
str: Extracted text from the PDF
"""
try:
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text() + '\n'
return text
except Exception as e:
print(f"Error extracting PDF text: {e}")
return ""
def preprocess_cv_text(text):
"""
Preprocess CV text for analysis
Args:
text (str): Raw CV text
Returns:
dict: Processed CV information
"""
# Normalize text
text = text.lower()
# Extract key sections with more flexible regex
sections = {
'contact': re.findall(r'(email|phone|contact)[:\s]*([^\n]+)', text),
'education': re.findall(r'(education|qualification|academic)[:\s]*(.*?)(?=\n\n|\n(?:work|experience|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
'experience': re.findall(r'(experience|work)[:\s]*(.*?)(?=\n\n|\n(?:education|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
'skills': re.findall(r'(skills|expertise|technical skills)[:\s]*(.*?)(?=\n\n|\n(?:education|work|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
'projects': re.findall(r'(projects)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
'training': re.findall(r'(training|certification)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
'hobbies': re.findall(r'(hobbies|interests)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
'personal': re.findall(r'(personal details)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|hobbies|declaration))', text, re.DOTALL | re.IGNORECASE)
}
# Process extracted sections
processed_sections = {}
for key, matches in sections.items():
if matches:
processed_sections[key] = " ".join([match[1].strip() for match in matches]) #Combine all matches into one string
return processed_sections
def analyze_cv_skills(cv_text):
"""
Analyze skills from CV and recommend career paths based on combined scores.
Args:
cv_text (str): Processed CV text
Returns:
dict: Career recommendations and analysis
"""
# Preprocess CV
cv_info = preprocess_cv_text(cv_text)
# Extract skills and keywords
all_skills = []
all_hobbies = []
all_qualifications = []
all_experience = []
#Skill Extraction
if 'skills' in cv_info:
skill_text = cv_info['skills']
doc = nlp(skill_text)
all_skills.extend([ent.text for ent in doc.ents if ent.label_ in ['SKILL', 'ORG','PRODUCT']]) #Add Org and Product
all_skills.extend([token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']])
# Manually extract skills based on keyword
skill_keywords = ["AutoCAD", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Heat Transfer","Machine Design", "Fluid Mechanics","CAD","CAM", "Matlab","GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis",
"Project Management", "Marketing", "Business Analysis", "Sales", "Finance", "Consulting", "Market Research",
"Graphic Design", "Content Writing", "Digital Marketing", "UI/UX Design", "Video Production","SEO","Social Media Marketing",
"Patient Care", "Medical Research", "Healthcare Administration", "Medical Technology", "Anatomy", "Physiology","Pharmacology","Python", "Java", "Machine Learning", "Data Science", "Cloud Computing", "Cybersecurity", "Web Development", "Software Development", "Database Management",
"SQL", "C++", "JavaScript","AWS", "Azure", "GCP", "Infrastructure as Code", "Networking", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing","R","Statistics", "Data Visualization", "Data Analysis","Agile Methodologies",
"Adobe Photoshop", "Adobe Illustrator", "Visual Communication", "Branding", "Copywriting", "Wireframing","Prototyping","User Research","Interaction Design","PPC Advertising","Email Marketing","Healthcare Management", "Healthcare Policy", "Healthcare Finance",
"Medical Terminology", "Clinical Procedures", "Vital Signs", "Electronic Health Records","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management","ISO Standards", "Statistical Process Control","Inspection","Testing",
"Requirements Gathering","Process Improvement"]
all_skills.extend([keyword for keyword in skill_keywords if keyword.lower() in skill_text.lower()])
# Experience Extraction
if 'experience' in cv_info:
exp_doc = nlp(cv_info['experience'])
all_experience.extend([token.text for token in exp_doc if token.pos_ in ['NOUN', 'VERB']])
# Manually extract skills based on keywords
exp_keywords = ["blueprints", "specifications","production","inspection", "testing","measurement","calipers",
"gauges","micrometers","quality standards","production process","finished items","inspection results", "test data","training", "design", "development","analysis", "management",
"research", "consulting"]
all_experience.extend([keyword for keyword in exp_keywords if keyword.lower() in cv_info['experience'].lower()])
#Project extraction
if 'projects' in cv_info:
proj_doc = nlp(cv_info['projects'])
all_experience.extend([token.text for token in proj_doc if token.pos_ in ['NOUN','VERB']]) #Add nouns and verbs
# Manually extract skills based on keywords
proj_keywords = ["helicopter", "assembly", "dismantling","5S methodology","flow path","material","productivity","layout"]
all_experience.extend([keyword for keyword in proj_keywords if keyword.lower() in cv_info['projects'].lower()])
#Training extraction
if 'training' in cv_info:
train_doc = nlp(cv_info['training'])
all_experience.extend([token.text for token in train_doc if token.pos_ in ['NOUN','VERB']])
# Manually extract skills based on keywords
train_keywords = ["inplant training"]
all_experience.extend([keyword for keyword in train_keywords if keyword.lower() in cv_info['training'].lower()])
#Hobby Extraction
if 'hobbies' in cv_info:
hobby_doc = nlp(cv_info['hobbies'])
all_hobbies.extend([token.text for token in hobby_doc if token.pos_ in ['NOUN','VERB','ADJ']]) #Add all POS tags
#Qualification Extraction
if 'education' in cv_info:
qual_doc = nlp(cv_info['education'])
all_qualifications.extend([token.text for token in qual_doc if token.pos_ in ['NOUN','ADJ']])
qual_keywords = ["engineering", "diploma", "bachelor", "master", "degree", "computer science", "information technology","business administration","medical","healthcare"]
all_qualifications.extend([keyword for keyword in qual_keywords if keyword.lower() in cv_info['education'].lower()])
# Remove duplicates and convert to lowercase
all_skills = list(set(skill.lower() for skill in all_skills if len(skill) > 2))
all_hobbies = list(set(hobby.lower() for hobby in all_hobbies if len(hobby)>2))
all_qualifications = list(set(qualification.lower() for qualification in all_qualifications if len(qualification) > 2))
all_experience = list(set(exp.lower() for exp in all_experience if len(exp)>2))
# Calculate similarity scores for each career recommendation
career_scores = []
for career in CAREER_RECOMMENDATIONS:
#Embed career skills and CV skills
career_skill_embeddings = embedding_model.encode(career['skills'])
cv_skill_embeddings = embedding_model.encode(all_skills)
#Embed CV sections
cv_hobby_embeddings = embedding_model.encode(all_hobbies)
cv_qualifications_embeddings = embedding_model.encode(all_qualifications)
cv_experience_embeddings = embedding_model.encode(all_experience)
total_similarity = 0
skills_similarity = 0
hobby_similarity = 0
qualification_similarity =0
experience_similarity = 0
#Calculate Similarity Score for skills
if len(cv_skill_embeddings) > 0:
similarities = cosine_similarity(career_skill_embeddings, cv_skill_embeddings)
skills_similarity= np.max(similarities) #Use max instead of avg
#Calculate similarity score for hobbies
if len(cv_hobby_embeddings) > 0:
similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_hobby_embeddings)
hobby_similarity = np.max(similarities)
#Calculate similarity score for qualification
if len(cv_qualifications_embeddings) > 0:
similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_qualifications_embeddings)
qualification_similarity = np.max(similarities)
#Calculate similarity score for experience
if len(cv_experience_embeddings) >0:
similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_experience_embeddings)
experience_similarity = np.max(similarities)
#Calculate weighted sum of similarities
total_similarity = (0.5*skills_similarity) + (0.1*hobby_similarity) + (0.2*qualification_similarity) + (0.2*experience_similarity)
career_scores.append({
'title': career['title'],
'description': career['description'],
'score': total_similarity,
'matched_skills': all_skills,
'matched_hobbies':all_hobbies,
'matched_qualifications':all_qualifications,
'matched_experience':all_experience
})
# Sort careers by similarity score
ranked_careers = sorted(career_scores, key=lambda x: x['score'], reverse=True)
# Prepare recommendation report
report = "### Career Recommendation Analysis\n\n"
report += "**Top Career Recommendations**:\n"
for career in ranked_careers[:5]: # Display top 5 recommendations
report += f"- **{career['title']}**\n"
report += f" *{career['description']}*\n"
report += f" *Similarity Score: {career['score']:.2f}*\n"
report += "\n**Skills Match**:\n"
report += "- Identified Skills: " + ", ".join(ranked_careers[0]['matched_skills']) + "\n\n"
report += "**Hobbies Match**:\n"
report += "- Identified Hobbies: " + ", ".join(ranked_careers[0]['matched_hobbies']) + "\n\n"
report += "**Qualification Match**:\n"
report += "- Identified Qualifications: " + ", ".join(ranked_careers[0]['matched_qualifications']) + "\n\n"
report += "**Experience Match**:\n"
report += "- Identified Experience: " + ", ".join(ranked_careers[0]['matched_experience']) + "\n\n"
return report
def cv_skill_assessment(cv_file):
"""
Main function to process uploaded CV and provide skill assessment
Args:
cv_file (str): Path to uploaded CV file
Returns:
str: Skill assessment and career recommendations
"""
try:
# Extract text from PDF
cv_text = extract_text_from_pdf(cv_file)
# If PDF extraction fails, try direct text processing
if not cv_text.strip():
with open(cv_file, 'r', encoding='utf-8') as f:
cv_text = f.read()
# Analyze CV and get recommendations
assessment = analyze_cv_skills(cv_text)
return assessment
except Exception as e:
return f"Error processing CV: {str(e)}"
# Create Gradio Interface
def launch_cv_skill_assessment_app():
"""
Launch the CV Skill Assessment AI Gradio Interface
"""
demo = gr.Interface(
fn=cv_skill_assessment,
inputs=gr.File(label="Upload Your CV (PDF/Text)", type="filepath"),
outputs=gr.Markdown(label="Career Recommendation Report"),
title="πŸš€ CV Skills Assessment AI",
description="""
Discover your ideal career path based on your CV!
*How to use*:
1. Upload your CV (PDF or Text file)
2. Our AI analyzes your skills, experience, and background
3. Receive personalized career recommendations
*Features*:
- Advanced CV parsing
- Skill extraction
- Domain-based career matching
- Detailed recommendation report
""",
theme="huggingface"
)
demo.launch(debug=True)
# Run the application
launch_cv_skill_assessment_app()