Spaces:
Build error
Build error
import gradio as gr | |
import torch | |
import spacy | |
import nltk | |
import re | |
import PyPDF2 | |
import numpy as np | |
import pandas as pd | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
# Download necessary NLTK resources | |
nltk.download('punkt') | |
# Load spaCy and Sentence Transformer models | |
nlp = spacy.load('en_core_web_sm') | |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Check for GPU availability | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Running on: {device}") | |
# Updated career database | |
CAREER_RECOMMENDATIONS = [ | |
{"title": "Software Engineer", "description": "Develops software applications and systems", "skills":["Python","Java","C++","JavaScript", "Software Development","Database Management","Web Development", "Cloud Computing","Data Structures", "Algorithms"]}, | |
{"title": "Data Scientist", "description": "Analyzes complex data to help make business decisions","skills": ["Python","R","Statistics","Machine Learning","Data Visualization","Data Analysis","SQL"]}, | |
{"title": "Cloud Solutions Architect", "description": "Designs and manages cloud computing strategies","skills":["Cloud Computing","AWS","Azure","GCP","Infrastructure as Code","Networking"]}, | |
{"title": "AI/ML Engineer", "description": "Creates intelligent systems and machine learning models","skills": ["Machine Learning", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing"]}, | |
{"title":"Database Administrator","description":"Manage databases, ensure data security","skills":["SQL", "Database Management", "Database Security", "Database Design","Database Modeling"]}, | |
{"title": "Mechanical Engineer", "description": "Designs, develops, and tests mechanical devices and systems","skills": ["CAD","CAM","Matlab","Mechanical Design", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Fluid Mechanics", "GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis"]}, | |
{"title": "Manufacturing Engineer", "description": "Optimizes manufacturing processes for efficiency and quality","skills": ["Manufacturing Engineering","Process Optimization","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management"]}, | |
{"title":"Quality Engineer","description":"Oversees quality assurance activities and ensures products meet standards.","skills":["Quality Control","Quality Assurance","ISO Standards","Statistical Process Control","Inspection","Testing"]}, | |
{"title": "Design Engineer", "description": "Creates product designs and technical drawings using CAD software","skills": ["CAD","CAM","Product Design","3D Modeling","Engineering Design","Drafting"]}, | |
{"title": "Business Analyst", "description": "Identifies business needs and determines solutions","skills": ["Business Analysis", "Requirements Gathering", "Data Analysis", "Process Improvement", "Project Management"]}, | |
{"title": "Marketing Manager", "description": "Develops and implements marketing strategies","skills":["Marketing","Digital Marketing","Social Media Marketing","Market Research","Branding","Advertising", "Content Marketing"]}, | |
{"title": "Project Manager", "description": "Leads and coordinates project teams and resources","skills":["Project Management","Project Planning","Risk Management","Team Management","Agile Methodologies"]}, | |
{"title": "Management Consultant", "description": "Advises organizations on improving performance","skills":["Consulting","Strategy","Problem Solving","Business Analysis","Communication"]}, | |
{"title": "Graphic Designer", "description": "Creates visual concepts using computer software or by hand","skills": ["Graphic Design","Adobe Photoshop","Adobe Illustrator","UI/UX Design","Visual Communication","Branding"]}, | |
{"title": "Content Strategist", "description": "Develops content plans and marketing strategies","skills":["Content Writing","Content Strategy","SEO","Content Marketing","Copywriting"]}, | |
{"title": "UI/UX Designer", "description": "Designs user interfaces for digital products","skills":["UI Design","UX Design","Wireframing","Prototyping","User Research","Interaction Design"]}, | |
{"title": "Digital Marketing Specialist", "description": "Promotes brands and products through digital channels","skills":["Digital Marketing","Social Media Marketing","SEO","PPC Advertising","Email Marketing","Content Marketing"]}, | |
{"title": "Healthcare Administrator", "description": "Manages healthcare facilities and services","skills":["Healthcare Administration","Healthcare Management","Healthcare Policy","Healthcare Finance","Patient Care"]}, | |
{"title": "Medical Researcher", "description": "Conducts research to improve medical knowledge","skills":["Medical Research","Data Analysis","Research Design","Laboratory Techniques","Scientific Writing"]}, | |
{"title": "Healthcare Consultant", "description": "Advises healthcare organizations on improvement strategies","skills":["Healthcare Consulting", "Healthcare Strategy","Healthcare Operations","Healthcare Policy"]}, | |
{"title":"Medical Assistant","description": "Assists with patient care and medical administrative tasks.","skills":["Patient Care","Medical Terminology","Medical Assisting","Clinical Procedures","Vital Signs","Electronic Health Records"]} | |
] | |
def extract_text_from_pdf(file_path): | |
""" | |
Extract text from PDF file | |
Args: | |
file_path (str): Path to the PDF file | |
Returns: | |
str: Extracted text from the PDF | |
""" | |
try: | |
with open(file_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
text = '' | |
for page in reader.pages: | |
text += page.extract_text() + '\n' | |
return text | |
except Exception as e: | |
print(f"Error extracting PDF text: {e}") | |
return "" | |
def preprocess_cv_text(text): | |
""" | |
Preprocess CV text for analysis | |
Args: | |
text (str): Raw CV text | |
Returns: | |
dict: Processed CV information | |
""" | |
# Normalize text | |
text = text.lower() | |
# Extract key sections with more flexible regex | |
sections = { | |
'contact': re.findall(r'(email|phone|contact)[:\s]*([^\n]+)', text), | |
'education': re.findall(r'(education|qualification|academic)[:\s]*(.*?)(?=\n\n|\n(?:work|experience|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), | |
'experience': re.findall(r'(experience|work)[:\s]*(.*?)(?=\n\n|\n(?:education|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), | |
'skills': re.findall(r'(skills|expertise|technical skills)[:\s]*(.*?)(?=\n\n|\n(?:education|work|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), | |
'projects': re.findall(r'(projects)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), | |
'training': re.findall(r'(training|certification)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), | |
'hobbies': re.findall(r'(hobbies|interests)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|personal|declaration))', text, re.DOTALL | re.IGNORECASE), | |
'personal': re.findall(r'(personal details)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|hobbies|declaration))', text, re.DOTALL | re.IGNORECASE) | |
} | |
# Process extracted sections | |
processed_sections = {} | |
for key, matches in sections.items(): | |
if matches: | |
processed_sections[key] = " ".join([match[1].strip() for match in matches]) #Combine all matches into one string | |
return processed_sections | |
def analyze_cv_skills(cv_text): | |
""" | |
Analyze skills from CV and recommend career paths based on combined scores. | |
Args: | |
cv_text (str): Processed CV text | |
Returns: | |
dict: Career recommendations and analysis | |
""" | |
# Preprocess CV | |
cv_info = preprocess_cv_text(cv_text) | |
# Extract skills and keywords | |
all_skills = [] | |
all_hobbies = [] | |
all_qualifications = [] | |
all_experience = [] | |
#Skill Extraction | |
if 'skills' in cv_info: | |
skill_text = cv_info['skills'] | |
doc = nlp(skill_text) | |
all_skills.extend([ent.text for ent in doc.ents if ent.label_ in ['SKILL', 'ORG','PRODUCT']]) #Add Org and Product | |
all_skills.extend([token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']]) | |
# Manually extract skills based on keyword | |
skill_keywords = ["AutoCAD", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Heat Transfer","Machine Design", "Fluid Mechanics","CAD","CAM", "Matlab","GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis", | |
"Project Management", "Marketing", "Business Analysis", "Sales", "Finance", "Consulting", "Market Research", | |
"Graphic Design", "Content Writing", "Digital Marketing", "UI/UX Design", "Video Production","SEO","Social Media Marketing", | |
"Patient Care", "Medical Research", "Healthcare Administration", "Medical Technology", "Anatomy", "Physiology","Pharmacology","Python", "Java", "Machine Learning", "Data Science", "Cloud Computing", "Cybersecurity", "Web Development", "Software Development", "Database Management", | |
"SQL", "C++", "JavaScript","AWS", "Azure", "GCP", "Infrastructure as Code", "Networking", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing","R","Statistics", "Data Visualization", "Data Analysis","Agile Methodologies", | |
"Adobe Photoshop", "Adobe Illustrator", "Visual Communication", "Branding", "Copywriting", "Wireframing","Prototyping","User Research","Interaction Design","PPC Advertising","Email Marketing","Healthcare Management", "Healthcare Policy", "Healthcare Finance", | |
"Medical Terminology", "Clinical Procedures", "Vital Signs", "Electronic Health Records","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management","ISO Standards", "Statistical Process Control","Inspection","Testing", | |
"Requirements Gathering","Process Improvement"] | |
all_skills.extend([keyword for keyword in skill_keywords if keyword.lower() in skill_text.lower()]) | |
# Experience Extraction | |
if 'experience' in cv_info: | |
exp_doc = nlp(cv_info['experience']) | |
all_experience.extend([token.text for token in exp_doc if token.pos_ in ['NOUN', 'VERB']]) | |
# Manually extract skills based on keywords | |
exp_keywords = ["blueprints", "specifications","production","inspection", "testing","measurement","calipers", | |
"gauges","micrometers","quality standards","production process","finished items","inspection results", "test data","training", "design", "development","analysis", "management", | |
"research", "consulting"] | |
all_experience.extend([keyword for keyword in exp_keywords if keyword.lower() in cv_info['experience'].lower()]) | |
#Project extraction | |
if 'projects' in cv_info: | |
proj_doc = nlp(cv_info['projects']) | |
all_experience.extend([token.text for token in proj_doc if token.pos_ in ['NOUN','VERB']]) #Add nouns and verbs | |
# Manually extract skills based on keywords | |
proj_keywords = ["helicopter", "assembly", "dismantling","5S methodology","flow path","material","productivity","layout"] | |
all_experience.extend([keyword for keyword in proj_keywords if keyword.lower() in cv_info['projects'].lower()]) | |
#Training extraction | |
if 'training' in cv_info: | |
train_doc = nlp(cv_info['training']) | |
all_experience.extend([token.text for token in train_doc if token.pos_ in ['NOUN','VERB']]) | |
# Manually extract skills based on keywords | |
train_keywords = ["inplant training"] | |
all_experience.extend([keyword for keyword in train_keywords if keyword.lower() in cv_info['training'].lower()]) | |
#Hobby Extraction | |
if 'hobbies' in cv_info: | |
hobby_doc = nlp(cv_info['hobbies']) | |
all_hobbies.extend([token.text for token in hobby_doc if token.pos_ in ['NOUN','VERB','ADJ']]) #Add all POS tags | |
#Qualification Extraction | |
if 'education' in cv_info: | |
qual_doc = nlp(cv_info['education']) | |
all_qualifications.extend([token.text for token in qual_doc if token.pos_ in ['NOUN','ADJ']]) | |
qual_keywords = ["engineering", "diploma", "bachelor", "master", "degree", "computer science", "information technology","business administration","medical","healthcare"] | |
all_qualifications.extend([keyword for keyword in qual_keywords if keyword.lower() in cv_info['education'].lower()]) | |
# Remove duplicates and convert to lowercase | |
all_skills = list(set(skill.lower() for skill in all_skills if len(skill) > 2)) | |
all_hobbies = list(set(hobby.lower() for hobby in all_hobbies if len(hobby)>2)) | |
all_qualifications = list(set(qualification.lower() for qualification in all_qualifications if len(qualification) > 2)) | |
all_experience = list(set(exp.lower() for exp in all_experience if len(exp)>2)) | |
# Calculate similarity scores for each career recommendation | |
career_scores = [] | |
for career in CAREER_RECOMMENDATIONS: | |
#Embed career skills and CV skills | |
career_skill_embeddings = embedding_model.encode(career['skills']) | |
cv_skill_embeddings = embedding_model.encode(all_skills) | |
#Embed CV sections | |
cv_hobby_embeddings = embedding_model.encode(all_hobbies) | |
cv_qualifications_embeddings = embedding_model.encode(all_qualifications) | |
cv_experience_embeddings = embedding_model.encode(all_experience) | |
total_similarity = 0 | |
skills_similarity = 0 | |
hobby_similarity = 0 | |
qualification_similarity =0 | |
experience_similarity = 0 | |
#Calculate Similarity Score for skills | |
if len(cv_skill_embeddings) > 0: | |
similarities = cosine_similarity(career_skill_embeddings, cv_skill_embeddings) | |
skills_similarity= np.max(similarities) #Use max instead of avg | |
#Calculate similarity score for hobbies | |
if len(cv_hobby_embeddings) > 0: | |
similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_hobby_embeddings) | |
hobby_similarity = np.max(similarities) | |
#Calculate similarity score for qualification | |
if len(cv_qualifications_embeddings) > 0: | |
similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_qualifications_embeddings) | |
qualification_similarity = np.max(similarities) | |
#Calculate similarity score for experience | |
if len(cv_experience_embeddings) >0: | |
similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_experience_embeddings) | |
experience_similarity = np.max(similarities) | |
#Calculate weighted sum of similarities | |
total_similarity = (0.5*skills_similarity) + (0.1*hobby_similarity) + (0.2*qualification_similarity) + (0.2*experience_similarity) | |
career_scores.append({ | |
'title': career['title'], | |
'description': career['description'], | |
'score': total_similarity, | |
'matched_skills': all_skills, | |
'matched_hobbies':all_hobbies, | |
'matched_qualifications':all_qualifications, | |
'matched_experience':all_experience | |
}) | |
# Sort careers by similarity score | |
ranked_careers = sorted(career_scores, key=lambda x: x['score'], reverse=True) | |
# Prepare recommendation report | |
report = "### Career Recommendation Analysis\n\n" | |
report += "**Top Career Recommendations**:\n" | |
for career in ranked_careers[:5]: # Display top 5 recommendations | |
report += f"- **{career['title']}**\n" | |
report += f" *{career['description']}*\n" | |
report += f" *Similarity Score: {career['score']:.2f}*\n" | |
report += "\n**Skills Match**:\n" | |
report += "- Identified Skills: " + ", ".join(ranked_careers[0]['matched_skills']) + "\n\n" | |
report += "**Hobbies Match**:\n" | |
report += "- Identified Hobbies: " + ", ".join(ranked_careers[0]['matched_hobbies']) + "\n\n" | |
report += "**Qualification Match**:\n" | |
report += "- Identified Qualifications: " + ", ".join(ranked_careers[0]['matched_qualifications']) + "\n\n" | |
report += "**Experience Match**:\n" | |
report += "- Identified Experience: " + ", ".join(ranked_careers[0]['matched_experience']) + "\n\n" | |
return report | |
def cv_skill_assessment(cv_file): | |
""" | |
Main function to process uploaded CV and provide skill assessment | |
Args: | |
cv_file (str): Path to uploaded CV file | |
Returns: | |
str: Skill assessment and career recommendations | |
""" | |
try: | |
# Extract text from PDF | |
cv_text = extract_text_from_pdf(cv_file) | |
# If PDF extraction fails, try direct text processing | |
if not cv_text.strip(): | |
with open(cv_file, 'r', encoding='utf-8') as f: | |
cv_text = f.read() | |
# Analyze CV and get recommendations | |
assessment = analyze_cv_skills(cv_text) | |
return assessment | |
except Exception as e: | |
return f"Error processing CV: {str(e)}" | |
# Create Gradio Interface | |
def launch_cv_skill_assessment_app(): | |
""" | |
Launch the CV Skill Assessment AI Gradio Interface | |
""" | |
demo = gr.Interface( | |
fn=cv_skill_assessment, | |
inputs=gr.File(label="Upload Your CV (PDF/Text)", type="filepath"), | |
outputs=gr.Markdown(label="Career Recommendation Report"), | |
title="π CV Skills Assessment AI", | |
description=""" | |
Discover your ideal career path based on your CV! | |
*How to use*: | |
1. Upload your CV (PDF or Text file) | |
2. Our AI analyzes your skills, experience, and background | |
3. Receive personalized career recommendations | |
*Features*: | |
- Advanced CV parsing | |
- Skill extraction | |
- Domain-based career matching | |
- Detailed recommendation report | |
""", | |
theme="huggingface" | |
) | |
demo.launch(debug=True) | |
# Run the application | |
launch_cv_skill_assessment_app() |