Spaces:

Anupam251272
/

Cv-Skill-Assessment-Ai

Build error

App Files Files Community

Cv-Skill-Assessment-Ai / app.py

Anupam251272

Update app.py

081374c verified 5 days ago

raw

history blame

18.5 kB

	import gradio as gr
	import torch
	import spacy
	import nltk
	import re
	import PyPDF2
	import numpy as np
	import pandas as pd
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity

	# Download necessary NLTK resources
	nltk.download('punkt')

	# Load spaCy and Sentence Transformer models
	nlp = spacy.load('en_core_web_sm')
	embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

	# Check for GPU availability
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Running on: {device}")

	# Updated career database
	CAREER_RECOMMENDATIONS = [
	{"title": "Software Engineer", "description": "Develops software applications and systems", "skills":["Python","Java","C++","JavaScript", "Software Development","Database Management","Web Development", "Cloud Computing","Data Structures", "Algorithms"]},
	{"title": "Data Scientist", "description": "Analyzes complex data to help make business decisions","skills": ["Python","R","Statistics","Machine Learning","Data Visualization","Data Analysis","SQL"]},
	{"title": "Cloud Solutions Architect", "description": "Designs and manages cloud computing strategies","skills":["Cloud Computing","AWS","Azure","GCP","Infrastructure as Code","Networking"]},
	{"title": "AI/ML Engineer", "description": "Creates intelligent systems and machine learning models","skills": ["Machine Learning", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing"]},
	{"title":"Database Administrator","description":"Manage databases, ensure data security","skills":["SQL", "Database Management", "Database Security", "Database Design","Database Modeling"]},
	{"title": "Mechanical Engineer", "description": "Designs, develops, and tests mechanical devices and systems","skills": ["CAD","CAM","Matlab","Mechanical Design", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Fluid Mechanics", "GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis"]},
	{"title": "Manufacturing Engineer", "description": "Optimizes manufacturing processes for efficiency and quality","skills": ["Manufacturing Engineering","Process Optimization","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management"]},
	{"title":"Quality Engineer","description":"Oversees quality assurance activities and ensures products meet standards.","skills":["Quality Control","Quality Assurance","ISO Standards","Statistical Process Control","Inspection","Testing"]},
	{"title": "Design Engineer", "description": "Creates product designs and technical drawings using CAD software","skills": ["CAD","CAM","Product Design","3D Modeling","Engineering Design","Drafting"]},
	{"title": "Business Analyst", "description": "Identifies business needs and determines solutions","skills": ["Business Analysis", "Requirements Gathering", "Data Analysis", "Process Improvement", "Project Management"]},
	{"title": "Marketing Manager", "description": "Develops and implements marketing strategies","skills":["Marketing","Digital Marketing","Social Media Marketing","Market Research","Branding","Advertising", "Content Marketing"]},
	{"title": "Project Manager", "description": "Leads and coordinates project teams and resources","skills":["Project Management","Project Planning","Risk Management","Team Management","Agile Methodologies"]},
	{"title": "Management Consultant", "description": "Advises organizations on improving performance","skills":["Consulting","Strategy","Problem Solving","Business Analysis","Communication"]},
	{"title": "Graphic Designer", "description": "Creates visual concepts using computer software or by hand","skills": ["Graphic Design","Adobe Photoshop","Adobe Illustrator","UI/UX Design","Visual Communication","Branding"]},
	{"title": "Content Strategist", "description": "Develops content plans and marketing strategies","skills":["Content Writing","Content Strategy","SEO","Content Marketing","Copywriting"]},
	{"title": "UI/UX Designer", "description": "Designs user interfaces for digital products","skills":["UI Design","UX Design","Wireframing","Prototyping","User Research","Interaction Design"]},
	{"title": "Digital Marketing Specialist", "description": "Promotes brands and products through digital channels","skills":["Digital Marketing","Social Media Marketing","SEO","PPC Advertising","Email Marketing","Content Marketing"]},
	{"title": "Healthcare Administrator", "description": "Manages healthcare facilities and services","skills":["Healthcare Administration","Healthcare Management","Healthcare Policy","Healthcare Finance","Patient Care"]},
	{"title": "Medical Researcher", "description": "Conducts research to improve medical knowledge","skills":["Medical Research","Data Analysis","Research Design","Laboratory Techniques","Scientific Writing"]},
	{"title": "Healthcare Consultant", "description": "Advises healthcare organizations on improvement strategies","skills":["Healthcare Consulting", "Healthcare Strategy","Healthcare Operations","Healthcare Policy"]},
	{"title":"Medical Assistant","description": "Assists with patient care and medical administrative tasks.","skills":["Patient Care","Medical Terminology","Medical Assisting","Clinical Procedures","Vital Signs","Electronic Health Records"]}
	]

	def extract_text_from_pdf(file_path):
	"""
	Extract text from PDF file

	Args:
	file_path (str): Path to the PDF file

	Returns:
	str: Extracted text from the PDF
	"""
	try:
	with open(file_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	text = ''
	for page in reader.pages:
	text += page.extract_text() + '\n'
	return text
	except Exception as e:
	print(f"Error extracting PDF text: {e}")
	return ""

	def preprocess_cv_text(text):
	"""
	Preprocess CV text for analysis

	Args:
	text (str): Raw CV text

	Returns:
	dict: Processed CV information
	"""
	# Normalize text
	text = text.lower()

	# Extract key sections with more flexible regex
	sections = {
	'contact': re.findall(r'(email\|phone\|contact)[:\s]*([^\n]+)', text),
	'education': re.findall(r'(education\|qualification\|academic)[:\s](.?)(?=\n\n\|\n(?:work\|experience\|skills\|projects\|training\|hobbies\|personal\|declaration))', text, re.DOTALL \| re.IGNORECASE),
	'experience': re.findall(r'(experience\|work)[:\s](.?)(?=\n\n\|\n(?:education\|skills\|projects\|training\|hobbies\|personal\|declaration))', text, re.DOTALL \| re.IGNORECASE),
	'skills': re.findall(r'(skills\|expertise\|technical skills)[:\s](.?)(?=\n\n\|\n(?:education\|work\|projects\|training\|hobbies\|personal\|declaration))', text, re.DOTALL \| re.IGNORECASE),
	'projects': re.findall(r'(projects)[:\s](.?)(?=\n\n\|\n(?:education\|work\|skills\|training\|hobbies\|personal\|declaration))', text, re.DOTALL \| re.IGNORECASE),
	'training': re.findall(r'(training\|certification)[:\s](.?)(?=\n\n\|\n(?:education\|work\|skills\|projects\|hobbies\|personal\|declaration))', text, re.DOTALL \| re.IGNORECASE),
	'hobbies': re.findall(r'(hobbies\|interests)[:\s](.?)(?=\n\n\|\n(?:education\|work\|skills\|projects\|training\|personal\|declaration))', text, re.DOTALL \| re.IGNORECASE),
	'personal': re.findall(r'(personal details)[:\s](.?)(?=\n\n\|\n(?:education\|work\|skills\|projects\|training\|hobbies\|declaration))', text, re.DOTALL \| re.IGNORECASE)
	}

	# Process extracted sections
	processed_sections = {}
	for key, matches in sections.items():
	if matches:
	processed_sections[key] = " ".join([match[1].strip() for match in matches]) #Combine all matches into one string

	return processed_sections

	def analyze_cv_skills(cv_text):
	"""
	Analyze skills from CV and recommend career paths based on combined scores.

	Args:
	cv_text (str): Processed CV text

	Returns:
	dict: Career recommendations and analysis
	"""
	# Preprocess CV
	cv_info = preprocess_cv_text(cv_text)

	# Extract skills and keywords
	all_skills = []
	all_hobbies = []
	all_qualifications = []
	all_experience = []

	#Skill Extraction
	if 'skills' in cv_info:
	skill_text = cv_info['skills']
	doc = nlp(skill_text)
	all_skills.extend([ent.text for ent in doc.ents if ent.label_ in ['SKILL', 'ORG','PRODUCT']]) #Add Org and Product
	all_skills.extend([token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']])
	# Manually extract skills based on keyword
	skill_keywords = ["AutoCAD", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Heat Transfer","Machine Design", "Fluid Mechanics","CAD","CAM", "Matlab","GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis",
	"Project Management", "Marketing", "Business Analysis", "Sales", "Finance", "Consulting", "Market Research",
	"Graphic Design", "Content Writing", "Digital Marketing", "UI/UX Design", "Video Production","SEO","Social Media Marketing",
	"Patient Care", "Medical Research", "Healthcare Administration", "Medical Technology", "Anatomy", "Physiology","Pharmacology","Python", "Java", "Machine Learning", "Data Science", "Cloud Computing", "Cybersecurity", "Web Development", "Software Development", "Database Management",
	"SQL", "C++", "JavaScript","AWS", "Azure", "GCP", "Infrastructure as Code", "Networking", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing","R","Statistics", "Data Visualization", "Data Analysis","Agile Methodologies",
	"Adobe Photoshop", "Adobe Illustrator", "Visual Communication", "Branding", "Copywriting", "Wireframing","Prototyping","User Research","Interaction Design","PPC Advertising","Email Marketing","Healthcare Management", "Healthcare Policy", "Healthcare Finance",
	"Medical Terminology", "Clinical Procedures", "Vital Signs", "Electronic Health Records","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management","ISO Standards", "Statistical Process Control","Inspection","Testing",
	"Requirements Gathering","Process Improvement"]
	all_skills.extend([keyword for keyword in skill_keywords if keyword.lower() in skill_text.lower()])
	# Experience Extraction
	if 'experience' in cv_info:
	exp_doc = nlp(cv_info['experience'])
	all_experience.extend([token.text for token in exp_doc if token.pos_ in ['NOUN', 'VERB']])
	# Manually extract skills based on keywords
	exp_keywords = ["blueprints", "specifications","production","inspection", "testing","measurement","calipers",
	"gauges","micrometers","quality standards","production process","finished items","inspection results", "test data","training", "design", "development","analysis", "management",
	"research", "consulting"]
	all_experience.extend([keyword for keyword in exp_keywords if keyword.lower() in cv_info['experience'].lower()])

	#Project extraction
	if 'projects' in cv_info:
	proj_doc = nlp(cv_info['projects'])
	all_experience.extend([token.text for token in proj_doc if token.pos_ in ['NOUN','VERB']]) #Add nouns and verbs
	# Manually extract skills based on keywords
	proj_keywords = ["helicopter", "assembly", "dismantling","5S methodology","flow path","material","productivity","layout"]
	all_experience.extend([keyword for keyword in proj_keywords if keyword.lower() in cv_info['projects'].lower()])

	#Training extraction
	if 'training' in cv_info:
	train_doc = nlp(cv_info['training'])
	all_experience.extend([token.text for token in train_doc if token.pos_ in ['NOUN','VERB']])
	# Manually extract skills based on keywords
	train_keywords = ["inplant training"]
	all_experience.extend([keyword for keyword in train_keywords if keyword.lower() in cv_info['training'].lower()])

	#Hobby Extraction
	if 'hobbies' in cv_info:
	hobby_doc = nlp(cv_info['hobbies'])
	all_hobbies.extend([token.text for token in hobby_doc if token.pos_ in ['NOUN','VERB','ADJ']]) #Add all POS tags

	#Qualification Extraction
	if 'education' in cv_info:
	qual_doc = nlp(cv_info['education'])
	all_qualifications.extend([token.text for token in qual_doc if token.pos_ in ['NOUN','ADJ']])
	qual_keywords = ["engineering", "diploma", "bachelor", "master", "degree", "computer science", "information technology","business administration","medical","healthcare"]
	all_qualifications.extend([keyword for keyword in qual_keywords if keyword.lower() in cv_info['education'].lower()])

	# Remove duplicates and convert to lowercase
	all_skills = list(set(skill.lower() for skill in all_skills if len(skill) > 2))
	all_hobbies = list(set(hobby.lower() for hobby in all_hobbies if len(hobby)>2))
	all_qualifications = list(set(qualification.lower() for qualification in all_qualifications if len(qualification) > 2))
	all_experience = list(set(exp.lower() for exp in all_experience if len(exp)>2))

	# Calculate similarity scores for each career recommendation
	career_scores = []
	for career in CAREER_RECOMMENDATIONS:
	#Embed career skills and CV skills
	career_skill_embeddings = embedding_model.encode(career['skills'])
	cv_skill_embeddings = embedding_model.encode(all_skills)

	#Embed CV sections
	cv_hobby_embeddings = embedding_model.encode(all_hobbies)
	cv_qualifications_embeddings = embedding_model.encode(all_qualifications)
	cv_experience_embeddings = embedding_model.encode(all_experience)

	total_similarity = 0
	skills_similarity = 0
	hobby_similarity = 0
	qualification_similarity =0
	experience_similarity = 0
	#Calculate Similarity Score for skills
	if len(cv_skill_embeddings) > 0:
	similarities = cosine_similarity(career_skill_embeddings, cv_skill_embeddings)
	skills_similarity= np.max(similarities) #Use max instead of avg
	#Calculate similarity score for hobbies
	if len(cv_hobby_embeddings) > 0:
	similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_hobby_embeddings)
	hobby_similarity = np.max(similarities)

	#Calculate similarity score for qualification
	if len(cv_qualifications_embeddings) > 0:
	similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_qualifications_embeddings)
	qualification_similarity = np.max(similarities)
	#Calculate similarity score for experience
	if len(cv_experience_embeddings) >0:
	similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_experience_embeddings)
	experience_similarity = np.max(similarities)

	#Calculate weighted sum of similarities
	total_similarity = (0.5skills_similarity) + (0.1hobby_similarity) + (0.2qualification_similarity) + (0.2experience_similarity)
	career_scores.append({
	'title': career['title'],
	'description': career['description'],
	'score': total_similarity,
	'matched_skills': all_skills,
	'matched_hobbies':all_hobbies,
	'matched_qualifications':all_qualifications,
	'matched_experience':all_experience
	})
	# Sort careers by similarity score
	ranked_careers = sorted(career_scores, key=lambda x: x['score'], reverse=True)

	# Prepare recommendation report
	report = "### Career Recommendation Analysis\n\n"
	report += "Top Career Recommendations:\n"
	for career in ranked_careers[:5]: # Display top 5 recommendations
	report += f"- {career['title']}\n"
	report += f" {career['description']}\n"
	report += f" Similarity Score: {career['score']:.2f}\n"

	report += "\nSkills Match:\n"
	report += "- Identified Skills: " + ", ".join(ranked_careers[0]['matched_skills']) + "\n\n"

	report += "Hobbies Match:\n"
	report += "- Identified Hobbies: " + ", ".join(ranked_careers[0]['matched_hobbies']) + "\n\n"

	report += "Qualification Match:\n"
	report += "- Identified Qualifications: " + ", ".join(ranked_careers[0]['matched_qualifications']) + "\n\n"

	report += "Experience Match:\n"
	report += "- Identified Experience: " + ", ".join(ranked_careers[0]['matched_experience']) + "\n\n"

	return report

	def cv_skill_assessment(cv_file):
	"""
	Main function to process uploaded CV and provide skill assessment

	Args:
	cv_file (str): Path to uploaded CV file

	Returns:
	str: Skill assessment and career recommendations
	"""
	try:
	# Extract text from PDF
	cv_text = extract_text_from_pdf(cv_file)

	# If PDF extraction fails, try direct text processing
	if not cv_text.strip():
	with open(cv_file, 'r', encoding='utf-8') as f:
	cv_text = f.read()

	# Analyze CV and get recommendations
	assessment = analyze_cv_skills(cv_text)

	return assessment

	except Exception as e:
	return f"Error processing CV: {str(e)}"

	# Create Gradio Interface
	def launch_cv_skill_assessment_app():
	"""
	Launch the CV Skill Assessment AI Gradio Interface
	"""
	demo = gr.Interface(
	fn=cv_skill_assessment,
	inputs=gr.File(label="Upload Your CV (PDF/Text)", type="filepath"),
	outputs=gr.Markdown(label="Career Recommendation Report"),
	title="🚀 CV Skills Assessment AI",
	description="""
	Discover your ideal career path based on your CV!

	How to use:
	1. Upload your CV (PDF or Text file)
	2. Our AI analyzes your skills, experience, and background
	3. Receive personalized career recommendations

	Features:
	- Advanced CV parsing
	- Skill extraction
	- Domain-based career matching
	- Detailed recommendation report
	""",
	theme="huggingface"
	)

	demo.launch(debug=True)

	# Run the application
	launch_cv_skill_assessment_app()