Anupam251272 commited on
Commit
bac66fb
·
verified ·
1 Parent(s): 4e4c085

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +310 -0
app.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import spacy
4
+ import nltk
5
+ import re
6
+ import PyPDF2
7
+ import numpy as np
8
+ import pandas as pd
9
+ from transformers import pipeline
10
+ from sentence_transformers import SentenceTransformer
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+
13
+ # Download necessary NLTK resources
14
+ nltk.download('punkt')
15
+
16
+ # Load spaCy and Sentence Transformer models
17
+ nlp = spacy.load('en_core_web_sm')
18
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
19
+
20
+ # Check for GPU availability
21
+ device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ print(f"Running on: {device}")
23
+
24
+
25
+ # Updated career database
26
+ CAREER_RECOMMENDATIONS = [
27
+ {"title": "Software Engineer", "description": "Develops software applications and systems", "skills":["Python","Java","C++","JavaScript", "Software Development","Database Management","Web Development", "Cloud Computing","Data Structures", "Algorithms"]},
28
+ {"title": "Data Scientist", "description": "Analyzes complex data to help make business decisions","skills": ["Python","R","Statistics","Machine Learning","Data Visualization","Data Analysis","SQL"]},
29
+ {"title": "Cloud Solutions Architect", "description": "Designs and manages cloud computing strategies","skills":["Cloud Computing","AWS","Azure","GCP","Infrastructure as Code","Networking"]},
30
+ {"title": "AI/ML Engineer", "description": "Creates intelligent systems and machine learning models","skills": ["Machine Learning", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing"]},
31
+ {"title":"Database Administrator","description":"Manage databases, ensure data security","skills":["SQL", "Database Management", "Database Security", "Database Design","Database Modeling"]},
32
+ {"title": "Mechanical Engineer", "description": "Designs, develops, and tests mechanical devices and systems","skills": ["CAD","CAM","Matlab","Mechanical Design", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Fluid Mechanics", "GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis"]},
33
+ {"title": "Manufacturing Engineer", "description": "Optimizes manufacturing processes for efficiency and quality","skills": ["Manufacturing Engineering","Process Optimization","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management"]},
34
+ {"title":"Quality Engineer","description":"Oversees quality assurance activities and ensures products meet standards.","skills":["Quality Control","Quality Assurance","ISO Standards","Statistical Process Control","Inspection","Testing"]},
35
+ {"title": "Design Engineer", "description": "Creates product designs and technical drawings using CAD software","skills": ["CAD","CAM","Product Design","3D Modeling","Engineering Design","Drafting"]},
36
+ {"title": "Business Analyst", "description": "Identifies business needs and determines solutions","skills": ["Business Analysis", "Requirements Gathering", "Data Analysis", "Process Improvement", "Project Management"]},
37
+ {"title": "Marketing Manager", "description": "Develops and implements marketing strategies","skills":["Marketing","Digital Marketing","Social Media Marketing","Market Research","Branding","Advertising", "Content Marketing"]},
38
+ {"title": "Project Manager", "description": "Leads and coordinates project teams and resources","skills":["Project Management","Project Planning","Risk Management","Team Management","Agile Methodologies"]},
39
+ {"title": "Management Consultant", "description": "Advises organizations on improving performance","skills":["Consulting","Strategy","Problem Solving","Business Analysis","Communication"]},
40
+ {"title": "Graphic Designer", "description": "Creates visual concepts using computer software or by hand","skills": ["Graphic Design","Adobe Photoshop","Adobe Illustrator","UI/UX Design","Visual Communication","Branding"]},
41
+ {"title": "Content Strategist", "description": "Develops content plans and marketing strategies","skills":["Content Writing","Content Strategy","SEO","Content Marketing","Copywriting"]},
42
+ {"title": "UI/UX Designer", "description": "Designs user interfaces for digital products","skills":["UI Design","UX Design","Wireframing","Prototyping","User Research","Interaction Design"]},
43
+ {"title": "Digital Marketing Specialist", "description": "Promotes brands and products through digital channels","skills":["Digital Marketing","Social Media Marketing","SEO","PPC Advertising","Email Marketing","Content Marketing"]},
44
+ {"title": "Healthcare Administrator", "description": "Manages healthcare facilities and services","skills":["Healthcare Administration","Healthcare Management","Healthcare Policy","Healthcare Finance","Patient Care"]},
45
+ {"title": "Medical Researcher", "description": "Conducts research to improve medical knowledge","skills":["Medical Research","Data Analysis","Research Design","Laboratory Techniques","Scientific Writing"]},
46
+ {"title": "Healthcare Consultant", "description": "Advises healthcare organizations on improvement strategies","skills":["Healthcare Consulting", "Healthcare Strategy","Healthcare Operations","Healthcare Policy"]},
47
+ {"title":"Medical Assistant","description": "Assists with patient care and medical administrative tasks.","skills":["Patient Care","Medical Terminology","Medical Assisting","Clinical Procedures","Vital Signs","Electronic Health Records"]}
48
+ ]
49
+
50
+ def extract_text_from_pdf(file_path):
51
+ """
52
+ Extract text from PDF file
53
+
54
+ Args:
55
+ file_path (str): Path to the PDF file
56
+
57
+ Returns:
58
+ str: Extracted text from the PDF
59
+ """
60
+ try:
61
+ with open(file_path, 'rb') as file:
62
+ reader = PyPDF2.PdfReader(file)
63
+ text = ''
64
+ for page in reader.pages:
65
+ text += page.extract_text() + '\n'
66
+ return text
67
+ except Exception as e:
68
+ print(f"Error extracting PDF text: {e}")
69
+ return ""
70
+ def preprocess_cv_text(text):
71
+ """
72
+ Preprocess CV text for analysis
73
+
74
+ Args:
75
+ text (str): Raw CV text
76
+
77
+ Returns:
78
+ dict: Processed CV information
79
+ """
80
+ # Normalize text
81
+ text = text.lower()
82
+
83
+ # Extract key sections with more flexible regex
84
+ sections = {
85
+ 'contact': re.findall(r'(email|phone|contact)[:\s]*([^\n]+)', text),
86
+ 'education': re.findall(r'(education|qualification|academic)[:\s]*(.*?)(?=\n\n|\n(?:work|experience|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
87
+ 'experience': re.findall(r'(experience|work)[:\s]*(.*?)(?=\n\n|\n(?:education|skills|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
88
+ 'skills': re.findall(r'(skills|expertise|technical skills)[:\s]*(.*?)(?=\n\n|\n(?:education|work|projects|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
89
+ 'projects': re.findall(r'(projects)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|training|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
90
+ 'training': re.findall(r'(training|certification)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|hobbies|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
91
+ 'hobbies': re.findall(r'(hobbies|interests)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|personal|declaration))', text, re.DOTALL | re.IGNORECASE),
92
+ 'personal': re.findall(r'(personal details)[:\s]*(.*?)(?=\n\n|\n(?:education|work|skills|projects|training|hobbies|declaration))', text, re.DOTALL | re.IGNORECASE)
93
+ }
94
+
95
+ # Process extracted sections
96
+ processed_sections = {}
97
+ for key, matches in sections.items():
98
+ if matches:
99
+ processed_sections[key] = " ".join([match[1].strip() for match in matches]) #Combine all matches into one string
100
+
101
+ return processed_sections
102
+
103
+ def analyze_cv_skills(cv_text):
104
+ """
105
+ Analyze skills from CV and recommend career paths based on combined scores.
106
+
107
+ Args:
108
+ cv_text (str): Processed CV text
109
+
110
+ Returns:
111
+ dict: Career recommendations and analysis
112
+ """
113
+ # Preprocess CV
114
+ cv_info = preprocess_cv_text(cv_text)
115
+
116
+ # Extract skills and keywords
117
+ all_skills = []
118
+ all_hobbies = []
119
+ all_qualifications = []
120
+ all_experience = []
121
+
122
+ #Skill Extraction
123
+ if 'skills' in cv_info:
124
+ skill_text = cv_info['skills']
125
+ doc = nlp(skill_text)
126
+ all_skills.extend([ent.text for ent in doc.ents if ent.label_ in ['SKILL', 'ORG','PRODUCT']]) #Add Org and Product
127
+ all_skills.extend([token.text for token in doc if token.pos_ in ['NOUN', 'ADJ']])
128
+ # Manually extract skills based on keyword
129
+ skill_keywords = ["AutoCAD", "Manufacturing Engineering", "Quality Control", "Thermal Engineering", "Heat Transfer","Machine Design", "Fluid Mechanics","CAD","CAM", "Matlab","GD&T","Engineering Drawings","Blueprint reading","Product Design","FEA Analysis",
130
+ "Project Management", "Marketing", "Business Analysis", "Sales", "Finance", "Consulting", "Market Research",
131
+ "Graphic Design", "Content Writing", "Digital Marketing", "UI/UX Design", "Video Production","SEO","Social Media Marketing",
132
+ "Patient Care", "Medical Research", "Healthcare Administration", "Medical Technology", "Anatomy", "Physiology","Pharmacology","Python", "Java", "Machine Learning", "Data Science", "Cloud Computing", "Cybersecurity", "Web Development", "Software Development", "Database Management",
133
+ "SQL", "C++", "JavaScript","AWS", "Azure", "GCP", "Infrastructure as Code", "Networking", "Deep Learning", "Neural Networks", "TensorFlow", "PyTorch","Computer Vision","Natural Language Processing","R","Statistics", "Data Visualization", "Data Analysis","Agile Methodologies",
134
+ "Adobe Photoshop", "Adobe Illustrator", "Visual Communication", "Branding", "Copywriting", "Wireframing","Prototyping","User Research","Interaction Design","PPC Advertising","Email Marketing","Healthcare Management", "Healthcare Policy", "Healthcare Finance",
135
+ "Medical Terminology", "Clinical Procedures", "Vital Signs", "Electronic Health Records","Lean Manufacturing","Six Sigma","Production Planning","Supply Chain Management","ISO Standards", "Statistical Process Control","Inspection","Testing",
136
+ "Requirements Gathering","Process Improvement"]
137
+ all_skills.extend([keyword for keyword in skill_keywords if keyword.lower() in skill_text.lower()])
138
+ # Experience Extraction
139
+ if 'experience' in cv_info:
140
+ exp_doc = nlp(cv_info['experience'])
141
+ all_experience.extend([token.text for token in exp_doc if token.pos_ in ['NOUN', 'VERB']])
142
+ # Manually extract skills based on keywords
143
+ exp_keywords = ["blueprints", "specifications","production","inspection", "testing","measurement","calipers",
144
+ "gauges","micrometers","quality standards","production process","finished items","inspection results", "test data","training", "design", "development","analysis", "management",
145
+ "research", "consulting"]
146
+ all_experience.extend([keyword for keyword in exp_keywords if keyword.lower() in cv_info['experience'].lower()])
147
+
148
+ #Project extraction
149
+ if 'projects' in cv_info:
150
+ proj_doc = nlp(cv_info['projects'])
151
+ all_experience.extend([token.text for token in proj_doc if token.pos_ in ['NOUN','VERB']]) #Add nouns and verbs
152
+ # Manually extract skills based on keywords
153
+ proj_keywords = ["helicopter", "assembly", "dismantling","5S methodology","flow path","material","productivity","layout"]
154
+ all_experience.extend([keyword for keyword in proj_keywords if keyword.lower() in cv_info['projects'].lower()])
155
+
156
+ #Training extraction
157
+ if 'training' in cv_info:
158
+ train_doc = nlp(cv_info['training'])
159
+ all_experience.extend([token.text for token in train_doc if token.pos_ in ['NOUN','VERB']])
160
+ # Manually extract skills based on keywords
161
+ train_keywords = ["inplant training"]
162
+ all_experience.extend([keyword for keyword in train_keywords if keyword.lower() in cv_info['training'].lower()])
163
+
164
+ #Hobby Extraction
165
+ if 'hobbies' in cv_info:
166
+ hobby_doc = nlp(cv_info['hobbies'])
167
+ all_hobbies.extend([token.text for token in hobby_doc if token.pos_ in ['NOUN','VERB','ADJ']]) #Add all POS tags
168
+
169
+ #Qualification Extraction
170
+ if 'education' in cv_info:
171
+ qual_doc = nlp(cv_info['education'])
172
+ all_qualifications.extend([token.text for token in qual_doc if token.pos_ in ['NOUN','ADJ']])
173
+ qual_keywords = ["engineering", "diploma", "bachelor", "master", "degree", "computer science", "information technology","business administration","medical","healthcare"]
174
+ all_qualifications.extend([keyword for keyword in qual_keywords if keyword.lower() in cv_info['education'].lower()])
175
+
176
+ # Remove duplicates and convert to lowercase
177
+ all_skills = list(set(skill.lower() for skill in all_skills if len(skill) > 2))
178
+ all_hobbies = list(set(hobby.lower() for hobby in all_hobbies if len(hobby)>2))
179
+ all_qualifications = list(set(qualification.lower() for qualification in all_qualifications if len(qualification) > 2))
180
+ all_experience = list(set(exp.lower() for exp in all_experience if len(exp)>2))
181
+
182
+ # Calculate similarity scores for each career recommendation
183
+ career_scores = []
184
+ for career in CAREER_RECOMMENDATIONS:
185
+ #Embed career skills and CV skills
186
+ career_skill_embeddings = embedding_model.encode(career['skills'])
187
+ cv_skill_embeddings = embedding_model.encode(all_skills)
188
+
189
+ #Embed CV sections
190
+ cv_hobby_embeddings = embedding_model.encode(all_hobbies)
191
+ cv_qualifications_embeddings = embedding_model.encode(all_qualifications)
192
+ cv_experience_embeddings = embedding_model.encode(all_experience)
193
+
194
+ total_similarity = 0
195
+ skills_similarity = 0
196
+ hobby_similarity = 0
197
+ qualification_similarity =0
198
+ experience_similarity = 0
199
+ #Calculate Similarity Score for skills
200
+ if len(cv_skill_embeddings) > 0:
201
+ similarities = cosine_similarity(career_skill_embeddings, cv_skill_embeddings)
202
+ skills_similarity= np.max(similarities) #Use max instead of avg
203
+ #Calculate similarity score for hobbies
204
+ if len(cv_hobby_embeddings) > 0:
205
+ similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_hobby_embeddings)
206
+ hobby_similarity = np.max(similarities)
207
+
208
+ #Calculate similarity score for qualification
209
+ if len(cv_qualifications_embeddings) > 0:
210
+ similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_qualifications_embeddings)
211
+ qualification_similarity = np.max(similarities)
212
+ #Calculate similarity score for experience
213
+ if len(cv_experience_embeddings) >0:
214
+ similarities = cosine_similarity(embedding_model.encode([", ".join(career['skills'])]),cv_experience_embeddings)
215
+ experience_similarity = np.max(similarities)
216
+
217
+ #Calculate weighted sum of similarities
218
+ total_similarity = (0.5*skills_similarity) + (0.1*hobby_similarity) + (0.2*qualification_similarity) + (0.2*experience_similarity)
219
+ career_scores.append({
220
+ 'title': career['title'],
221
+ 'description': career['description'],
222
+ 'score': total_similarity,
223
+ 'matched_skills': all_skills,
224
+ 'matched_hobbies':all_hobbies,
225
+ 'matched_qualifications':all_qualifications,
226
+ 'matched_experience':all_experience
227
+ })
228
+ # Sort careers by similarity score
229
+ ranked_careers = sorted(career_scores, key=lambda x: x['score'], reverse=True)
230
+
231
+ # Prepare recommendation report
232
+ report = "### Career Recommendation Analysis\n\n"
233
+ report += "**Top Career Recommendations**:\n"
234
+ for career in ranked_careers[:5]: # Display top 5 recommendations
235
+ report += f"- **{career['title']}**\n"
236
+ report += f" *{career['description']}*\n"
237
+ report += f" *Similarity Score: {career['score']:.2f}*\n"
238
+
239
+ report += "\n**Skills Match**:\n"
240
+ report += "- Identified Skills: " + ", ".join(ranked_careers[0]['matched_skills']) + "\n\n"
241
+
242
+ report += "**Hobbies Match**:\n"
243
+ report += "- Identified Hobbies: " + ", ".join(ranked_careers[0]['matched_hobbies']) + "\n\n"
244
+
245
+ report += "**Qualification Match**:\n"
246
+ report += "- Identified Qualifications: " + ", ".join(ranked_careers[0]['matched_qualifications']) + "\n\n"
247
+
248
+ report += "**Experience Match**:\n"
249
+ report += "- Identified Experience: " + ", ".join(ranked_careers[0]['matched_experience']) + "\n\n"
250
+
251
+ return report
252
+
253
+ def cv_skill_assessment(cv_file):
254
+ """
255
+ Main function to process uploaded CV and provide skill assessment
256
+
257
+ Args:
258
+ cv_file (str): Path to uploaded CV file
259
+
260
+ Returns:
261
+ str: Skill assessment and career recommendations
262
+ """
263
+ try:
264
+ # Extract text from PDF
265
+ cv_text = extract_text_from_pdf(cv_file)
266
+
267
+ # If PDF extraction fails, try direct text processing
268
+ if not cv_text.strip():
269
+ with open(cv_file, 'r', encoding='utf-8') as f:
270
+ cv_text = f.read()
271
+
272
+ # Analyze CV and get recommendations
273
+ assessment = analyze_cv_skills(cv_text)
274
+
275
+ return assessment
276
+
277
+ except Exception as e:
278
+ return f"Error processing CV: {str(e)}"
279
+
280
+ # Create Gradio Interface
281
+ def launch_cv_skill_assessment_app():
282
+ """
283
+ Launch the CV Skill Assessment AI Gradio Interface
284
+ """
285
+ demo = gr.Interface(
286
+ fn=cv_skill_assessment,
287
+ inputs=gr.File(label="Upload Your CV (PDF/Text)", type="filepath"),
288
+ outputs=gr.Markdown(label="Career Recommendation Report"),
289
+ title="🚀 CV Skills Assessment AI",
290
+ description="""
291
+ Discover your ideal career path based on your CV!
292
+
293
+ *How to use*:
294
+ 1. Upload your CV (PDF or Text file)
295
+ 2. Our AI analyzes your skills, experience, and background
296
+ 3. Receive personalized career recommendations
297
+
298
+ *Features*:
299
+ - Advanced CV parsing
300
+ - Skill extraction
301
+ - Domain-based career matching
302
+ - Detailed recommendation report
303
+ """,
304
+ theme="huggingface"
305
+ )
306
+
307
+ demo.launch(debug=True)
308
+
309
+ # Run the application
310
+ launch_cv_skill_assessment_app()