trangannh commited on
Commit
06da8b9
1 Parent(s): 44a80a6

Update job_recommendation_inference.py

Browse files
Files changed (1) hide show
  1. job_recommendation_inference.py +74 -57
job_recommendation_inference.py CHANGED
@@ -1,58 +1,75 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
-
4
- # Đường dẫn tới mô hình trên Hugging Face Hub
5
- model_name = "trangannh/ptit-job-recommendation"
6
-
7
- # Khởi tạo tokenizer và mô hình từ tên mô hình trên Hugging Face
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
10
-
11
- # Hàm dự đoán công việc dựa trên kỹ năng cứng, kỹ năng mềm và ngành nghề
12
- def recommend_jobs(input_hard_skills, input_soft_skills, input_major, top_n=3):
13
- # Chuẩn bị input cho hình
14
- inputs = {
15
- "hard_skills": input_hard_skills,
16
- "soft_skills": input_soft_skills,
17
- "major": input_major
18
- }
19
-
20
- # Tiền xử hóa input
21
- encoded_input = tokenizer.encode_plus(
22
- inputs["hard_skills"], # Kỹ năng cứng
23
- inputs["soft_skills"], # Kỹ năng mềm
24
- inputs["major"], # Ngành nghề
25
- add_special_tokens=True,
26
- return_tensors="pt"
27
- )
28
-
29
- # Dự đoán
30
- with torch.no_grad():
31
- outputs = model(**encoded_input)
32
-
33
- # Lấy giá trị dự đoán và sắp xếp theo thứ tự giảm dần
34
- logits = outputs.logits[0].tolist()
35
- sorted_indices = sorted(range(len(logits)), key=lambda k: logits[k], reverse=True)
36
-
37
- # Lấy top N công việc gợi ý
38
- recommended_jobs = []
39
- for i in range(min(top_n, len(sorted_indices))):
40
- job_index = sorted_indices[i]
41
- recommended_jobs.append(tokenizer.decode(job_index))
42
-
43
- return recommended_jobs
44
-
45
- # Hướng dẫn sử dụng
46
  if __name__ == "__main__":
47
- # Input từ người dùng ( thể làm thay đổi để phù hợp với nhu cầu thực tế)
48
- input_hard_skills = input("Nhập kỹ năng cứng của bạn: ")
49
- input_soft_skills = input("Nhập kỹ năng mềm của bạn: ")
50
- input_major = input("Nhập ngành nghề của bạn: ")
51
-
52
- # Gợi ý công việc
53
- recommended_jobs = recommend_jobs(input_hard_skills, input_soft_skills, input_major)
54
-
55
- # In kết quả
56
- print(f"Các công việc được gợi ý cho bạn:")
57
- for i, job in enumerate(recommended_jobs, start=1):
58
- print(f"{i}. {job}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ # Load data (assuming your data loading and preprocessing are handled elsewhere)
7
+
8
+ def recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):
9
+ input_hard_skills_vec = tfidf_vectorizer_skills.transform([input_hard_skills])
10
+ input_soft_skills_vec = tfidf_vectorizer_skills.transform([input_soft_skills])
11
+ input_major_vec = tfidf_vectorizer_majors.transform([input_major])
12
+
13
+ # Average the vectorized hard and soft skills
14
+ input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2
15
+
16
+ # Compute similarities
17
+ skills_similarity = cosine_similarity(input_skills_vec, companies_skills_vec)
18
+ major_similarity = cosine_similarity(input_major_vec, companies_majors_vec)
19
+
20
+ # Ensure the number of companies in both similarities is aligned
21
+ if skills_similarity.shape[1] != major_similarity.shape[1]:
22
+ min_dim = min(skills_similarity.shape[1], major_similarity.shape[1])
23
+ skills_similarity = skills_similarity[:, :min_dim]
24
+ major_similarity = major_similarity[:, :min_dim]
25
+
26
+ # Combine similarities
27
+ combined_similarity = (skills_similarity + major_similarity) / 2
28
+
29
+ # Get top 3 job recommendations
30
+ sorted_company_indices = np.argsort(-combined_similarity[0])
31
+ recommended_companies = companies.iloc[sorted_company_indices]['Major'].values[:3]
32
+
33
+ return recommended_companies
34
+
35
+ # Example usage if run as a script
 
 
 
 
 
 
 
 
 
 
36
  if __name__ == "__main__":
37
+ # Load necessary data (you may need to adjust paths based on your actual data location)
38
+ users_data = "1st_train.csv"
39
+ applicants = pd.read_csv(users_data)
40
+
41
+ jobs_data = "jobs_data.csv"
42
+ companies = pd.read_csv(jobs_data)
43
+
44
+ # Preprocess data as needed
45
+
46
+ # Vectorize skills and majors
47
+ tfidf_vectorizer_skills = TfidfVectorizer()
48
+ tfidf_vectorizer_majors = TfidfVectorizer()
49
+
50
+ # Fit vectorizers on all skills and majors
51
+ all_skills = pd.concat([applicants['final_hard_skill'], applicants['final_soft_skill'],
52
+ companies['final_hard_skill'], companies['final_soft_skill']])
53
+ all_majors = pd.concat([applicants['candidate_field'], companies['Major']])
54
+
55
+ all_skills_vectorized = tfidf_vectorizer_skills.fit_transform(all_skills)
56
+ all_majors_vectorized = tfidf_vectorizer_majors.fit_transform(all_majors)
57
+
58
+ # Split the TF-IDF vectors back into applicants and companies
59
+ num_applicants = len(applicants)
60
+ num_companies = len(companies)
61
+
62
+ applicants_skills_vectorized = all_skills_vectorized[:num_applicants*2] # because each applicant has 2 skill entries
63
+ companies_skills_vectorized = all_skills_vectorized[num_applicants*2:]
64
+
65
+ applicants_majors_vectorized = all_majors_vectorized[:num_applicants]
66
+ companies_majors_vectorized = all_majors_vectorized[num_applicants:]
67
+
68
+ # Example input
69
+ input_hard_skills = "Business, Finance, Excel"
70
+ input_soft_skills = "Communication, Teamwork"
71
+ input_major = "Marketing"
72
+
73
+ recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vectorized, companies_majors_vectorized)
74
+ print("Recommended Jobs based on input skills and major:")
75
+ print(recommended_jobs)