import gradio as gr import torch import spacy import nltk import re import PyPDF2 import numpy as np import pandas as pd from transformers import pipeline from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity # Download necessary NLTK resources nltk.download('punkt') # Load spaCy and Sentence Transformer models nlp = spacy.load('en_core_web_sm') embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Check for GPU availability device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Running on: {device}") # Updated career database CAREER_RECOMMENDATIONS = [ {"title": "Software Engineer", "description": "Develops software applications and systems", "skills":["Python","Java","C++","JavaScript", "Software Development","Database Management","Web Development", "Cloud Computing","Data Structures", "Algorithms"]}, {"title": "Data Scientist", "description": "Analyzes complex data to help make business decisions","skills": ["Python","R","Statistics","Machine Learning","Data Visualization","Data Analysis","SQL"]}, {"title": "Cloud Solutions Architect", "description": "Designs and manages cloud computing strategies","skills":["Cloud Computing","AWS","Azure","GCP","Infrastructure as Code","Networking"]}, {"title": "AI/ML Engineer", "description": "Creates intelligent systems and machine learning models","skills": ["Machine Learning", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing"]}, {"title":"Database Administrator","description":"Manage databases, ensure data security","skills":["SQL", "Database Management", "Database Security", "Database Design","Database Modeling"]}, {"title": "Mechanical Engineer", "description": "Designs, develops, and tests mechanical devices and systems","skills": ["CAD","CAM","Matlab","Mechanical Design", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Fluid Mechanics", "GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis"]}, {"title": "Manufacturing Engineer", "description": "Optimizes manufacturing processes for efficiency and quality","skills": ["Manufacturing Engineering","Process Optimization","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management"]}, {"title":"Quality Engineer","description":"Oversees quality assurance activities and ensures products meet standards.","skills":["Quality Control","Quality Assurance","ISO Standards","Statistical Process Control","Inspection","Testing"]}, {"title": "Design Engineer", "description": "Creates product designs and technical drawings using CAD software","skills": ["CAD","CAM","Product Design","3D Modeling","Engineering Design","Drafting"]}, {"title": "Business Analyst", "description": "Identifies business needs and determines solutions","skills": ["Business Analysis", "Requirements Gathering", "Data Analysis", "Process Improvement", "Project Management"]}, {"title": "Marketing Manager", "description": "Develops and implements marketing strategies","skills":["Marketing","Digital Marketing","Social Media Marketing","Market Research","Branding","Advertising", "Content Marketing"]}, {"title": "Project Manager", "description": "Leads and coordinates project teams and resources","skills":["Project Management","Project Planning","Risk Management","Team Management","Agile Methodologies"]}, {"title": "Management Consultant", "description": "Advises organizations on improving performance","skills":["Consulting","Strategy","Problem Solving","Business Analysis","Communication"]}, {"title": "Graphic Designer", "description": "Creates visual concepts using computer software or by hand","skills": ["Graphic Design","Adobe Photoshop","Adobe Illustrator","UI/UX Design","Visual Communication","Branding"]}, {"title": "Content Strategist", "description": "Develops content plans and marketing strategies","skills":["Content Writing","Content Strategy","SEO","Content Marketing","Copywriting"]}, {"title": "UI/UX Designer", "description": "Designs user interfaces for digital products","skills":["UI Design","UX Design","Wireframing","Prototyping","User Research","Interaction Design"]}, {"title": "Digital Marketing Specialist", "description": "Promotes brands and products through digital channels","skills":["Digital Marketing","Social Media Marketing","SEO","PPC Advertising","Email Marketing","Content Marketing"]}, {"title": "Healthcare Administrator", "description": "Manages healthcare facilities and services","skills":["Healthcare Administration","Healthcare Management","Healthcare Policy","Healthcare Finance","Patient Care"]}, {"title": "Medical Researcher", "description": "Conducts research to improve medical knowledge","skills":["Medical Research","Data Analysis","Research Design","Laboratory Techniques","Scientific Writing"]}, {"title": "Healthcare Consultant", "description": "Advises healthcare organizations on improvement strategies","skills":["Healthcare Consulting", "Healthcare Strategy","Healthcare Operations","Healthcare Policy"]}, {"title":"Medical Assistant","description": "Assists with patient care and medical administrative tasks.","skills":["Patient Care","Medical Terminology","Medical Assisting","Clinical Procedures","Vital Signs","Electronic Health Records"]} ] def extract_text_from_pdf(file_path): """ Extract text from PDF file Args: file_path (str): Path to the PDF file Returns: str: Extracted text from the PDF """ try: with open(file_path, 'rb') as file: reader = PyPDF2.PdfReader(file) text = '' for page in reader.pages: text += page.extract_text() + '\n' return text except Exception as e: print(f"Error extracting PDF text: {e}") return "" def preprocess_cv_text(text): """ Preprocess CV text for analysis Args: text (str): Raw CV text Returns: dict: Processed CV information """ # Normalize text text = text.lower() # Extract key sections with more flexible regex sections = { 'contact': re.findall(r'(email|phone|contact)[:\s]*([^\n]+)', text), 'education': re.findall(r'(education|qualification|academic)[:\s]*(.*?)(?=\n\n|\n(?:work|experience|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), 'experience': re.findall(r'(experience|work)[:\s]*(.*?)(?=\n\n|\n(?:education|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), 'skills': re.findall(r'(skills|expertise|technical skills)[:\s]*(.*?)(?=\n\n|\n(?:education|work|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), 'projects': re.findall(r'(projects)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), 'training': re.findall(r'(training|certification)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE), 'hobbies': re.findall(r'(hobbies|interests)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|personal|declaration))', text, re.DOTALL | re.IGNORECASE), 'personal': re.findall(r'(personal details)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|hobbies|declaration))', text, re.DOTALL | re.IGNORECASE) } # Process extracted sections processed_sections = {} for key, matches in sections.items(): if matches: processed_sections[key] = " ".join([match[1].strip() for match in matches]) #Combine all matches into one string return processed_sections def analyze_cv_skills(cv_text): """ Analyze skills from CV and recommend career paths based on combined scores. Args: cv_text (str): Processed CV text Returns: dict: Career recommendations and analysis """ # Preprocess CV cv_info = preprocess_cv_text(cv_text) # Extract skills and keywords all_skills = [] all_hobbies = [] all_qualifications = [] all_experience = [] #Skill Extraction if 'skills' in cv_info: skill_text = cv_info['skills'] doc = nlp(skill_text) all_skills.extend([ent.text for ent in doc.ents if ent.label_ in ['SKILL', 'ORG','PRODUCT']]) #Add Org and Product all_skills.extend([token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']]) # Manually extract skills based on keyword skill_keywords = ["AutoCAD", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Heat Transfer","Machine Design", "Fluid Mechanics","CAD","CAM", "Matlab","GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis", "Project Management", "Marketing", "Business Analysis", "Sales", "Finance", "Consulting", "Market Research", "Graphic Design", "Content Writing", "Digital Marketing", "UI/UX Design", "Video Production","SEO","Social Media Marketing", "Patient Care", "Medical Research", "Healthcare Administration", "Medical Technology", "Anatomy", "Physiology","Pharmacology","Python", "Java", "Machine Learning", "Data Science", "Cloud Computing", "Cybersecurity", "Web Development", "Software Development", "Database Management", "SQL", "C++", "JavaScript","AWS", "Azure", "GCP", "Infrastructure as Code", "Networking", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing","R","Statistics", "Data Visualization", "Data Analysis","Agile Methodologies", "Adobe Photoshop", "Adobe Illustrator", "Visual Communication", "Branding", "Copywriting", "Wireframing","Prototyping","User Research","Interaction Design","PPC Advertising","Email Marketing","Healthcare Management", "Healthcare Policy", "Healthcare Finance", "Medical Terminology", "Clinical Procedures", "Vital Signs", "Electronic Health Records","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management","ISO Standards", "Statistical Process Control","Inspection","Testing", "Requirements Gathering","Process Improvement"] all_skills.extend([keyword for keyword in skill_keywords if keyword.lower() in skill_text.lower()]) # Experience Extraction if 'experience' in cv_info: exp_doc = nlp(cv_info['experience']) all_experience.extend([token.text for token in exp_doc if token.pos_ in ['NOUN', 'VERB']]) # Manually extract skills based on keywords exp_keywords = ["blueprints", "specifications","production","inspection", "testing","measurement","calipers", "gauges","micrometers","quality standards","production process","finished items","inspection results", "test data","training", "design", "development","analysis", "management", "research", "consulting"] all_experience.extend([keyword for keyword in exp_keywords if keyword.lower() in cv_info['experience'].lower()]) #Project extraction if 'projects' in cv_info: proj_doc = nlp(cv_info['projects']) all_experience.extend([token.text for token in proj_doc if token.pos_ in ['NOUN','VERB']]) #Add nouns and verbs # Manually extract skills based on keywords proj_keywords = ["helicopter", "assembly", "dismantling","5S methodology","flow path","material","productivity","layout"] all_experience.extend([keyword for keyword in proj_keywords if keyword.lower() in cv_info['projects'].lower()]) #Training extraction if 'training' in cv_info: train_doc = nlp(cv_info['training']) all_experience.extend([token.text for token in train_doc if token.pos_ in ['NOUN','VERB']]) # Manually extract skills based on keywords train_keywords = ["inplant training"] all_experience.extend([keyword for keyword in train_keywords if keyword.lower() in cv_info['training'].lower()]) #Hobby Extraction if 'hobbies' in cv_info: hobby_doc = nlp(cv_info['hobbies']) all_hobbies.extend([token.text for token in hobby_doc if token.pos_ in ['NOUN','VERB','ADJ']]) #Add all POS tags #Qualification Extraction if 'education' in cv_info: qual_doc = nlp(cv_info['education']) all_qualifications.extend([token.text for token in qual_doc if token.pos_ in ['NOUN','ADJ']]) qual_keywords = ["engineering", "diploma", "bachelor", "master", "degree", "computer science", "information technology","business administration","medical","healthcare"] all_qualifications.extend([keyword for keyword in qual_keywords if keyword.lower() in cv_info['education'].lower()]) # Remove duplicates and convert to lowercase all_skills = list(set(skill.lower() for skill in all_skills if len(skill) > 2)) all_hobbies = list(set(hobby.lower() for hobby in all_hobbies if len(hobby)>2)) all_qualifications = list(set(qualification.lower() for qualification in all_qualifications if len(qualification) > 2)) all_experience = list(set(exp.lower() for exp in all_experience if len(exp)>2)) # Calculate similarity scores for each career recommendation career_scores = [] for career in CAREER_RECOMMENDATIONS: #Embed career skills and CV skills career_skill_embeddings = embedding_model.encode(career['skills']) cv_skill_embeddings = embedding_model.encode(all_skills) #Embed CV sections cv_hobby_embeddings = embedding_model.encode(all_hobbies) cv_qualifications_embeddings = embedding_model.encode(all_qualifications) cv_experience_embeddings = embedding_model.encode(all_experience) total_similarity = 0 skills_similarity = 0 hobby_similarity = 0 qualification_similarity =0 experience_similarity = 0 #Calculate Similarity Score for skills if len(cv_skill_embeddings) > 0: similarities = cosine_similarity(career_skill_embeddings, cv_skill_embeddings) skills_similarity= np.max(similarities) #Use max instead of avg #Calculate similarity score for hobbies if len(cv_hobby_embeddings) > 0: similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_hobby_embeddings) hobby_similarity = np.max(similarities) #Calculate similarity score for qualification if len(cv_qualifications_embeddings) > 0: similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_qualifications_embeddings) qualification_similarity = np.max(similarities) #Calculate similarity score for experience if len(cv_experience_embeddings) >0: similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_experience_embeddings) experience_similarity = np.max(similarities) #Calculate weighted sum of similarities total_similarity = (0.5*skills_similarity) + (0.1*hobby_similarity) + (0.2*qualification_similarity) + (0.2*experience_similarity) career_scores.append({ 'title': career['title'], 'description': career['description'], 'score': total_similarity, 'matched_skills': all_skills, 'matched_hobbies':all_hobbies, 'matched_qualifications':all_qualifications, 'matched_experience':all_experience }) # Sort careers by similarity score ranked_careers = sorted(career_scores, key=lambda x: x['score'], reverse=True) # Prepare recommendation report report = "### Career Recommendation Analysis\n\n" report += "**Top Career Recommendations**:\n" for career in ranked_careers[:5]: # Display top 5 recommendations report += f"- **{career['title']}**\n" report += f" *{career['description']}*\n" report += f" *Similarity Score: {career['score']:.2f}*\n" report += "\n**Skills Match**:\n" report += "- Identified Skills: " + ", ".join(ranked_careers[0]['matched_skills']) + "\n\n" report += "**Hobbies Match**:\n" report += "- Identified Hobbies: " + ", ".join(ranked_careers[0]['matched_hobbies']) + "\n\n" report += "**Qualification Match**:\n" report += "- Identified Qualifications: " + ", ".join(ranked_careers[0]['matched_qualifications']) + "\n\n" report += "**Experience Match**:\n" report += "- Identified Experience: " + ", ".join(ranked_careers[0]['matched_experience']) + "\n\n" return report def cv_skill_assessment(cv_file): """ Main function to process uploaded CV and provide skill assessment Args: cv_file (str): Path to uploaded CV file Returns: str: Skill assessment and career recommendations """ try: # Extract text from PDF cv_text = extract_text_from_pdf(cv_file) # If PDF extraction fails, try direct text processing if not cv_text.strip(): with open(cv_file, 'r', encoding='utf-8') as f: cv_text = f.read() # Analyze CV and get recommendations assessment = analyze_cv_skills(cv_text) return assessment except Exception as e: return f"Error processing CV: {str(e)}" # Create Gradio Interface def launch_cv_skill_assessment_app(): """ Launch the CV Skill Assessment AI Gradio Interface """ demo = gr.Interface( fn=cv_skill_assessment, inputs=gr.File(label="Upload Your CV (PDF/Text)", type="filepath"), outputs=gr.Markdown(label="Career Recommendation Report"), title="🚀 CV Skills Assessment AI", description=""" Discover your ideal career path based on your CV! *How to use*: 1. Upload your CV (PDF or Text file) 2. Our AI analyzes your skills, experience, and background 3. Receive personalized career recommendations *Features*: - Advanced CV parsing - Skill extraction - Domain-based career matching - Detailed recommendation report """, theme="huggingface" ) demo.launch(debug=True) # Run the application launch_cv_skill_assessment_app()