Spaces:

Anupam251272
/

Cv-Skill-Assessment-Ai

Build error

App Files Files Community

Anupam251272 commited on Dec 17, 2024

Commit

bac66fb

verified ·

1 Parent(s): 4e4c085

Create app.py

Browse files

Files changed (1) hide show

app.py +310 -0

app.py ADDED Viewed

	@@ -0,0 +1,310 @@

+import gradio as gr
+import torch
+import spacy
+import nltk
+import re
+import PyPDF2
+import numpy as np
+import pandas as pd
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+# Download necessary NLTK resources
+nltk.download('punkt')
+# Load spaCy and Sentence Transformer models
+nlp = spacy.load('en_core_web_sm')
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Check for GPU availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Running on: {device}")
+# Updated career database
+CAREER_RECOMMENDATIONS = [
+    {"title": "Software Engineer", "description": "Develops software applications and systems", "skills":["Python","Java","C++","JavaScript", "Software Development","Database Management","Web Development", "Cloud Computing","Data Structures", "Algorithms"]},
+     {"title": "Data Scientist", "description": "Analyzes complex data to help make business decisions","skills": ["Python","R","Statistics","Machine Learning","Data Visualization","Data Analysis","SQL"]},
+     {"title": "Cloud Solutions Architect", "description": "Designs and manages cloud computing strategies","skills":["Cloud Computing","AWS","Azure","GCP","Infrastructure as Code","Networking"]},
+    {"title": "AI/ML Engineer", "description": "Creates intelligent systems and machine learning models","skills": ["Machine Learning", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing"]},
+    {"title":"Database Administrator","description":"Manage databases, ensure data security","skills":["SQL", "Database Management", "Database Security", "Database Design","Database Modeling"]},
+    {"title": "Mechanical Engineer", "description": "Designs, develops, and tests mechanical devices and systems","skills": ["CAD","CAM","Matlab","Mechanical Design", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Fluid Mechanics", "GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis"]},
+    {"title": "Manufacturing Engineer", "description": "Optimizes manufacturing processes for efficiency and quality","skills": ["Manufacturing Engineering","Process Optimization","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management"]},
+    {"title":"Quality Engineer","description":"Oversees quality assurance activities and ensures products meet standards.","skills":["Quality Control","Quality Assurance","ISO Standards","Statistical Process Control","Inspection","Testing"]},
+     {"title": "Design Engineer", "description": "Creates product designs and technical drawings using CAD software","skills": ["CAD","CAM","Product Design","3D Modeling","Engineering Design","Drafting"]},
+    {"title": "Business Analyst", "description": "Identifies business needs and determines solutions","skills": ["Business Analysis", "Requirements Gathering", "Data Analysis", "Process Improvement", "Project Management"]},
+    {"title": "Marketing Manager", "description": "Develops and implements marketing strategies","skills":["Marketing","Digital Marketing","Social Media Marketing","Market Research","Branding","Advertising", "Content Marketing"]},
+   {"title": "Project Manager", "description": "Leads and coordinates project teams and resources","skills":["Project Management","Project Planning","Risk Management","Team Management","Agile Methodologies"]},
+    {"title": "Management Consultant", "description": "Advises organizations on improving performance","skills":["Consulting","Strategy","Problem Solving","Business Analysis","Communication"]},
+    {"title": "Graphic Designer", "description": "Creates visual concepts using computer software or by hand","skills": ["Graphic Design","Adobe Photoshop","Adobe Illustrator","UI/UX Design","Visual Communication","Branding"]},
+    {"title": "Content Strategist", "description": "Develops content plans and marketing strategies","skills":["Content Writing","Content Strategy","SEO","Content Marketing","Copywriting"]},
+    {"title": "UI/UX Designer", "description": "Designs user interfaces for digital products","skills":["UI Design","UX Design","Wireframing","Prototyping","User Research","Interaction Design"]},
+    {"title": "Digital Marketing Specialist", "description": "Promotes brands and products through digital channels","skills":["Digital Marketing","Social Media Marketing","SEO","PPC Advertising","Email Marketing","Content Marketing"]},
+    {"title": "Healthcare Administrator", "description": "Manages healthcare facilities and services","skills":["Healthcare Administration","Healthcare Management","Healthcare Policy","Healthcare Finance","Patient Care"]},
+    {"title": "Medical Researcher", "description": "Conducts research to improve medical knowledge","skills":["Medical Research","Data Analysis","Research Design","Laboratory Techniques","Scientific Writing"]},
+    {"title": "Healthcare Consultant", "description": "Advises healthcare organizations on improvement strategies","skills":["Healthcare Consulting", "Healthcare Strategy","Healthcare Operations","Healthcare Policy"]},
+     {"title":"Medical Assistant","description": "Assists with patient care and medical administrative tasks.","skills":["Patient Care","Medical Terminology","Medical Assisting","Clinical Procedures","Vital Signs","Electronic Health Records"]}
+]
+def extract_text_from_pdf(file_path):
+    """
+    Extract text from PDF file
+    Args:
+        file_path (str): Path to the PDF file
+    Returns:
+        str: Extracted text from the PDF
+    """
+    try:
+        with open(file_path, 'rb') as file:
+            reader = PyPDF2.PdfReader(file)
+            text = ''
+            for page in reader.pages:
+                text += page.extract_text() + '\n'
+            return text
+    except Exception as e:
+        print(f"Error extracting PDF text: {e}")
+        return ""
+def preprocess_cv_text(text):
+    """
+    Preprocess CV text for analysis
+    Args:
+        text (str): Raw CV text
+    Returns:
+        dict: Processed CV information
+    """
+    # Normalize text
+    text = text.lower()
+    # Extract key sections with more flexible regex
+    sections = {
+         'contact': re.findall(r'(email|phone|contact)[:\s]*([^\n]+)', text),
+         'education': re.findall(r'(education|qualification|academic)[:\s]*(.*?)(?=\n\n|\n(?:work|experience|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
+        'experience': re.findall(r'(experience|work)[:\s]*(.*?)(?=\n\n|\n(?:education|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
+        'skills': re.findall(r'(skills|expertise|technical skills)[:\s]*(.*?)(?=\n\n|\n(?:education|work|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
+         'projects': re.findall(r'(projects)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
+        'training': re.findall(r'(training|certification)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
+        'hobbies': re.findall(r'(hobbies|interests)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
+        'personal': re.findall(r'(personal details)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|hobbies|declaration))', text, re.DOTALL | re.IGNORECASE)
+    }
+    # Process extracted sections
+    processed_sections = {}
+    for key, matches in sections.items():
+        if matches:
+            processed_sections[key] = " ".join([match[1].strip() for match in matches]) #Combine all matches into one string
+    return processed_sections
+def analyze_cv_skills(cv_text):
+    """
+    Analyze skills from CV and recommend career paths based on combined scores.
+    Args:
+        cv_text (str): Processed CV text
+    Returns:
+        dict: Career recommendations and analysis
+    """
+    # Preprocess CV
+    cv_info = preprocess_cv_text(cv_text)
+    # Extract skills and keywords
+    all_skills = []
+    all_hobbies = []
+    all_qualifications = []
+    all_experience = []
+    #Skill Extraction
+    if 'skills' in cv_info:
+        skill_text = cv_info['skills']
+        doc = nlp(skill_text)
+        all_skills.extend([ent.text for ent in doc.ents if ent.label_ in ['SKILL', 'ORG','PRODUCT']]) #Add Org and Product
+        all_skills.extend([token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']])
+          # Manually extract skills based on keyword
+        skill_keywords = ["AutoCAD", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Heat Transfer","Machine Design", "Fluid Mechanics","CAD","CAM", "Matlab","GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis",
+                        "Project Management", "Marketing", "Business Analysis", "Sales", "Finance", "Consulting", "Market Research",
+                       "Graphic Design", "Content Writing", "Digital Marketing", "UI/UX Design", "Video Production","SEO","Social Media Marketing",
+                       "Patient Care", "Medical Research", "Healthcare Administration", "Medical Technology", "Anatomy", "Physiology","Pharmacology","Python", "Java", "Machine Learning", "Data Science", "Cloud Computing", "Cybersecurity", "Web Development", "Software Development", "Database Management",
+                       "SQL", "C++", "JavaScript","AWS", "Azure", "GCP", "Infrastructure as Code", "Networking", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing","R","Statistics", "Data Visualization", "Data Analysis","Agile Methodologies",
+                        "Adobe Photoshop", "Adobe Illustrator", "Visual Communication", "Branding", "Copywriting", "Wireframing","Prototyping","User Research","Interaction Design","PPC Advertising","Email Marketing","Healthcare Management", "Healthcare Policy", "Healthcare Finance",
+                        "Medical Terminology", "Clinical Procedures", "Vital Signs", "Electronic Health Records","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management","ISO Standards", "Statistical Process Control","Inspection","Testing",
+                        "Requirements Gathering","Process Improvement"]
+        all_skills.extend([keyword for keyword in skill_keywords if keyword.lower() in skill_text.lower()])
+    # Experience Extraction
+    if 'experience' in cv_info:
+        exp_doc = nlp(cv_info['experience'])
+        all_experience.extend([token.text for token in exp_doc if token.pos_ in ['NOUN', 'VERB']])
+          # Manually extract skills based on keywords
+        exp_keywords = ["blueprints", "specifications","production","inspection", "testing","measurement","calipers",
+                           "gauges","micrometers","quality standards","production process","finished items","inspection results", "test data","training", "design", "development","analysis", "management",
+                            "research", "consulting"]
+        all_experience.extend([keyword for keyword in exp_keywords if keyword.lower() in cv_info['experience'].lower()])
+    #Project extraction
+    if 'projects' in cv_info:
+      proj_doc = nlp(cv_info['projects'])
+      all_experience.extend([token.text for token in proj_doc if token.pos_ in ['NOUN','VERB']]) #Add nouns and verbs
+         # Manually extract skills based on keywords
+      proj_keywords = ["helicopter", "assembly", "dismantling","5S methodology","flow path","material","productivity","layout"]
+      all_experience.extend([keyword for keyword in proj_keywords if keyword.lower() in cv_info['projects'].lower()])
+      #Training extraction
+    if 'training' in cv_info:
+      train_doc = nlp(cv_info['training'])
+      all_experience.extend([token.text for token in train_doc if token.pos_ in ['NOUN','VERB']])
+          # Manually extract skills based on keywords
+      train_keywords = ["inplant training"]
+      all_experience.extend([keyword for keyword in train_keywords if keyword.lower() in cv_info['training'].lower()])
+    #Hobby Extraction
+    if 'hobbies' in cv_info:
+        hobby_doc = nlp(cv_info['hobbies'])
+        all_hobbies.extend([token.text for token in hobby_doc if token.pos_ in ['NOUN','VERB','ADJ']]) #Add all POS tags
+    #Qualification Extraction
+    if 'education' in cv_info:
+      qual_doc = nlp(cv_info['education'])
+      all_qualifications.extend([token.text for token in qual_doc if token.pos_ in ['NOUN','ADJ']])
+      qual_keywords = ["engineering", "diploma", "bachelor", "master", "degree", "computer science", "information technology","business administration","medical","healthcare"]
+      all_qualifications.extend([keyword for keyword in qual_keywords if keyword.lower() in cv_info['education'].lower()])
+    # Remove duplicates and convert to lowercase
+    all_skills = list(set(skill.lower() for skill in all_skills if len(skill) > 2))
+    all_hobbies = list(set(hobby.lower() for hobby in all_hobbies if len(hobby)>2))
+    all_qualifications = list(set(qualification.lower() for qualification in all_qualifications if len(qualification) > 2))
+    all_experience = list(set(exp.lower() for exp in all_experience if len(exp)>2))
+    # Calculate similarity scores for each career recommendation
+    career_scores = []
+    for career in CAREER_RECOMMENDATIONS:
+         #Embed career skills and CV skills
+        career_skill_embeddings = embedding_model.encode(career['skills'])
+        cv_skill_embeddings = embedding_model.encode(all_skills)
+        #Embed CV sections
+        cv_hobby_embeddings = embedding_model.encode(all_hobbies)
+        cv_qualifications_embeddings = embedding_model.encode(all_qualifications)
+        cv_experience_embeddings = embedding_model.encode(all_experience)
+        total_similarity = 0
+        skills_similarity = 0
+        hobby_similarity = 0
+        qualification_similarity =0
+        experience_similarity = 0
+        #Calculate Similarity Score for skills
+        if len(cv_skill_embeddings) > 0:
+          similarities = cosine_similarity(career_skill_embeddings, cv_skill_embeddings)
+          skills_similarity= np.max(similarities) #Use max instead of avg
+        #Calculate similarity score for hobbies
+        if len(cv_hobby_embeddings) > 0:
+          similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_hobby_embeddings)
+          hobby_similarity = np.max(similarities)
+          #Calculate similarity score for qualification
+        if len(cv_qualifications_embeddings) > 0:
+          similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_qualifications_embeddings)
+          qualification_similarity = np.max(similarities)
+          #Calculate similarity score for experience
+        if len(cv_experience_embeddings) >0:
+          similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_experience_embeddings)
+          experience_similarity = np.max(similarities)
+        #Calculate weighted sum of similarities
+        total_similarity = (0.5*skills_similarity) + (0.1*hobby_similarity) + (0.2*qualification_similarity) + (0.2*experience_similarity)
+        career_scores.append({
+            'title': career['title'],
+            'description': career['description'],
+            'score': total_similarity,
+            'matched_skills': all_skills,
+            'matched_hobbies':all_hobbies,
+            'matched_qualifications':all_qualifications,
+            'matched_experience':all_experience
+        })
+    # Sort careers by similarity score
+    ranked_careers = sorted(career_scores, key=lambda x: x['score'], reverse=True)
+    # Prepare recommendation report
+    report = "### Career Recommendation Analysis\n\n"
+    report += "**Top Career Recommendations**:\n"
+    for career in ranked_careers[:5]:  # Display top 5 recommendations
+        report += f"- **{career['title']}**\n"
+        report += f"  *{career['description']}*\n"
+        report += f"  *Similarity Score: {career['score']:.2f}*\n"
+    report += "\n**Skills Match**:\n"
+    report += "- Identified Skills: " + ", ".join(ranked_careers[0]['matched_skills']) + "\n\n"
+    report += "**Hobbies Match**:\n"
+    report += "- Identified Hobbies: " + ", ".join(ranked_careers[0]['matched_hobbies']) + "\n\n"
+    report += "**Qualification Match**:\n"
+    report += "- Identified Qualifications: " + ", ".join(ranked_careers[0]['matched_qualifications']) + "\n\n"
+    report += "**Experience Match**:\n"
+    report += "- Identified Experience: " + ", ".join(ranked_careers[0]['matched_experience']) + "\n\n"
+    return report
+def cv_skill_assessment(cv_file):
+    """
+    Main function to process uploaded CV and provide skill assessment
+    Args:
+        cv_file (str): Path to uploaded CV file
+    Returns:
+        str: Skill assessment and career recommendations
+    """
+    try:
+        # Extract text from PDF
+        cv_text = extract_text_from_pdf(cv_file)
+        # If PDF extraction fails, try direct text processing
+        if not cv_text.strip():
+            with open(cv_file, 'r', encoding='utf-8') as f:
+                cv_text = f.read()
+        # Analyze CV and get recommendations
+        assessment = analyze_cv_skills(cv_text)
+        return assessment
+    except Exception as e:
+        return f"Error processing CV: {str(e)}"
+# Create Gradio Interface
+def launch_cv_skill_assessment_app():
+    """
+    Launch the CV Skill Assessment AI Gradio Interface
+    """
+    demo = gr.Interface(
+        fn=cv_skill_assessment,
+        inputs=gr.File(label="Upload Your CV (PDF/Text)", type="filepath"),
+        outputs=gr.Markdown(label="Career Recommendation Report"),
+        title="🚀 CV Skills Assessment AI",
+        description="""
+        Discover your ideal career path based on your CV!
+        *How to use*:
+        1. Upload your CV (PDF or Text file)
+        2. Our AI analyzes your skills, experience, and background
+        3. Receive personalized career recommendations
+        *Features*:
+        - Advanced CV parsing
+        - Skill extraction
+        - Domain-based career matching
+        - Detailed recommendation report
+        """,
+        theme="huggingface"
+    )
+    demo.launch(debug=True)
+# Run the application
+launch_cv_skill_assessment_app()