trangannh commited on
Commit
59d0bec
1 Parent(s): b921b90

Update job_recommendation_inference.py

Browse files
Files changed (1) hide show
  1. job_recommendation_inference.py +15 -25
job_recommendation_inference.py CHANGED
@@ -2,52 +2,45 @@ import numpy as np
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
-
6
- # Load data
7
 
8
  def recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):
9
  input_hard_skills_vec = tfidf_vectorizer_skills.transform([input_hard_skills])
10
  input_soft_skills_vec = tfidf_vectorizer_skills.transform([input_soft_skills])
11
  input_major_vec = tfidf_vectorizer_majors.transform([input_major])
12
 
13
- # Average the vectorized hard and soft skills
14
  input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2
15
 
16
- # Compute similarities
17
  skills_similarity = cosine_similarity(input_skills_vec, companies_skills_vec)
18
  major_similarity = cosine_similarity(input_major_vec, companies_majors_vec)
19
 
20
- # Ensure the number of companies in both similarities is aligned
21
  if skills_similarity.shape[1] != major_similarity.shape[1]:
22
  min_dim = min(skills_similarity.shape[1], major_similarity.shape[1])
23
  skills_similarity = skills_similarity[:, :min_dim]
24
  major_similarity = major_similarity[:, :min_dim]
25
 
26
- # Combine similarities
27
  combined_similarity = (skills_similarity + major_similarity) / 2
28
 
29
- # Get top 3 job recommendations
30
  sorted_company_indices = np.argsort(-combined_similarity[0])
31
  recommended_companies = companies.iloc[sorted_company_indices]['Major'].values[:3]
32
 
33
  return recommended_companies
34
 
35
- # Example usage if run as a script
36
- if __name__ == "__main__":
37
- # Load necessary data (you may need to adjust paths based on your actual data location)
38
- users_data = "1st_train.csv"
 
 
 
39
  applicants = pd.read_csv(users_data)
40
 
41
- jobs_data = "jobs_data.csv"
42
  companies = pd.read_csv(jobs_data)
43
 
44
- # Preprocess data as needed
45
-
46
- # Vectorize skills and majors
47
  tfidf_vectorizer_skills = TfidfVectorizer()
48
  tfidf_vectorizer_majors = TfidfVectorizer()
49
 
50
- # Fit vectorizers on all skills and majors
51
  all_skills = pd.concat([applicants['final_hard_skill'], applicants['final_soft_skill'],
52
  companies['final_hard_skill'], companies['final_soft_skill']])
53
  all_majors = pd.concat([applicants['candidate_field'], companies['Major']])
@@ -55,21 +48,18 @@ if __name__ == "__main__":
55
  all_skills_vectorized = tfidf_vectorizer_skills.fit_transform(all_skills)
56
  all_majors_vectorized = tfidf_vectorizer_majors.fit_transform(all_majors)
57
 
58
- # Split the TF-IDF vectors back into applicants and companies
59
  num_applicants = len(applicants)
60
  num_companies = len(companies)
61
 
62
- applicants_skills_vectorized = all_skills_vectorized[:num_applicants*2] # because each applicant has 2 skill entries
63
  companies_skills_vectorized = all_skills_vectorized[num_applicants*2:]
64
 
65
  applicants_majors_vectorized = all_majors_vectorized[:num_applicants]
66
  companies_majors_vectorized = all_majors_vectorized[num_applicants:]
67
 
68
- # Example input
69
- input_hard_skills = "Business, Finance, Excel"
70
- input_soft_skills = "Communication, Teamwork"
71
- input_major = "Marketing"
72
-
73
  recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vectorized, companies_majors_vectorized)
74
- print("Recommended Jobs based on input skills and major:")
75
- print(recommended_jobs)
 
 
 
 
2
  import pandas as pd
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
+ import json
 
6
 
7
  def recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):
8
  input_hard_skills_vec = tfidf_vectorizer_skills.transform([input_hard_skills])
9
  input_soft_skills_vec = tfidf_vectorizer_skills.transform([input_soft_skills])
10
  input_major_vec = tfidf_vectorizer_majors.transform([input_major])
11
 
 
12
  input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2
13
 
 
14
  skills_similarity = cosine_similarity(input_skills_vec, companies_skills_vec)
15
  major_similarity = cosine_similarity(input_major_vec, companies_majors_vec)
16
 
 
17
  if skills_similarity.shape[1] != major_similarity.shape[1]:
18
  min_dim = min(skills_similarity.shape[1], major_similarity.shape[1])
19
  skills_similarity = skills_similarity[:, :min_dim]
20
  major_similarity = major_similarity[:, :min_dim]
21
 
 
22
  combined_similarity = (skills_similarity + major_similarity) / 2
23
 
 
24
  sorted_company_indices = np.argsort(-combined_similarity[0])
25
  recommended_companies = companies.iloc[sorted_company_indices]['Major'].values[:3]
26
 
27
  return recommended_companies
28
 
29
+ def handler(event, context):
30
+ input_data = json.loads(event['body'])
31
+ input_hard_skills = input_data["input_hard_skills"]
32
+ input_soft_skills = input_data["input_soft_skills"]
33
+ input_major = input_data["input_major"]
34
+
35
+ users_data = "/path/to/your/1st_train.csv"
36
  applicants = pd.read_csv(users_data)
37
 
38
+ jobs_data = "/path/to/your/jobs_data.csv"
39
  companies = pd.read_csv(jobs_data)
40
 
 
 
 
41
  tfidf_vectorizer_skills = TfidfVectorizer()
42
  tfidf_vectorizer_majors = TfidfVectorizer()
43
 
 
44
  all_skills = pd.concat([applicants['final_hard_skill'], applicants['final_soft_skill'],
45
  companies['final_hard_skill'], companies['final_soft_skill']])
46
  all_majors = pd.concat([applicants['candidate_field'], companies['Major']])
 
48
  all_skills_vectorized = tfidf_vectorizer_skills.fit_transform(all_skills)
49
  all_majors_vectorized = tfidf_vectorizer_majors.fit_transform(all_majors)
50
 
 
51
  num_applicants = len(applicants)
52
  num_companies = len(companies)
53
 
54
+ applicants_skills_vectorized = all_skills_vectorized[:num_applicants*2]
55
  companies_skills_vectorized = all_skills_vectorized[num_applicants*2:]
56
 
57
  applicants_majors_vectorized = all_majors_vectorized[:num_applicants]
58
  companies_majors_vectorized = all_majors_vectorized[num_applicants:]
59
 
 
 
 
 
 
60
  recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vectorized, companies_majors_vectorized)
61
+
62
+ return {
63
+ 'statusCode': 200,
64
+ 'body': json.dumps(recommended_jobs.tolist())
65
+ }