Spaces:
Sleeping
Sleeping
soojeongcrystal
commited on
Commit
β’
65f9910
1
Parent(s):
5ad04e8
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
5 |
import networkx as nx
|
6 |
import matplotlib.pyplot as plt
|
7 |
import csv
|
8 |
-
import datetime
|
9 |
import io
|
10 |
|
11 |
# Sentence-BERT λͺ¨λΈ λ‘λ
|
@@ -24,51 +23,95 @@ def save_recommendations_to_csv(recommendations):
|
|
24 |
output.seek(0)
|
25 |
return output
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# μ§μ λ°μ΄ν°λ₯Ό λΆμνμ¬ κ΅μ‘ νλ‘κ·Έλ¨μ μΆμ²νκ³ κ·Έλνλ₯Ό 그리λ ν¨μ
|
28 |
def analyze_data(employee_file, program_file):
|
29 |
-
# μ§μ λ°μ΄ν°μ
|
30 |
employee_df = pd.read_csv(employee_file.name)
|
31 |
program_df = pd.read_csv(program_file.name)
|
32 |
|
33 |
-
#
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
employee_embeddings = model.encode(employee_skills)
|
37 |
program_embeddings = model.encode(program_skills)
|
38 |
|
39 |
# μ μ¬λ κ³μ°
|
40 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
41 |
|
42 |
-
# μ§μλ³ μΆμ² νλ‘κ·Έλ¨
|
43 |
recommendations = []
|
44 |
-
recommendation_rows = [] # CSV
|
45 |
for i, employee in employee_df.iterrows():
|
46 |
recommended_programs = []
|
47 |
for j, program in program_df.iterrows():
|
48 |
-
if similarities[i][j] > 0.5: # μ μ¬λ μκ³κ°
|
49 |
-
recommended_programs.append(f"{program['program_name']} ({program['duration']})")
|
50 |
|
51 |
if recommended_programs:
|
52 |
-
recommendation = f"μ§μ {employee['employee_name']}μ μΆμ² νλ‘κ·Έλ¨: {', '.join(recommended_programs)}"
|
53 |
-
recommendation_rows.append([employee['employee_id'], employee['employee_name'], ", ".join(recommended_programs)])
|
54 |
else:
|
55 |
-
recommendation = f"μ§μ {employee['employee_name']}μκ² μ ν©ν νλ‘κ·Έλ¨μ΄ μμ΅λλ€."
|
56 |
-
recommendation_rows.append([employee['employee_id'], employee['employee_name'], "μ ν©ν νλ‘κ·Έλ¨ μμ"])
|
57 |
|
58 |
recommendations.append(recommendation)
|
59 |
|
60 |
# λ€νΈμν¬ κ·Έλν μμ±
|
61 |
G = nx.Graph()
|
62 |
-
for employee in employee_df['employee_name']:
|
63 |
G.add_node(employee, type='employee')
|
64 |
|
65 |
-
for program in program_df['program_name']:
|
66 |
G.add_node(program, type='program')
|
67 |
|
68 |
for i, employee in employee_df.iterrows():
|
69 |
for j, program in program_df.iterrows():
|
70 |
-
if similarities[i][j] > 0.5:
|
71 |
-
G.add_edge(employee['employee_name'], program['program_name'])
|
72 |
|
73 |
# κ·Έλν μκ°ν
|
74 |
plt.figure(figsize=(10, 8))
|
@@ -84,18 +127,18 @@ def analyze_data(employee_file, program_file):
|
|
84 |
|
85 |
# Gradio λΈλ‘
|
86 |
with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
|
87 |
-
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>πΌ HybridRAG μμ€ν
</h1>"
|
88 |
|
89 |
with gr.Row():
|
90 |
-
with gr.Column(scale=1):
|
91 |
gr.Markdown("<h3 style='color: #34495e;'>1. μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°λ₯Ό μ
λ‘λνμΈμ</h3>")
|
92 |
employee_file = gr.File(label="μ§μ λ°μ΄ν° μ
λ‘λ", interactive=True)
|
93 |
program_file = gr.File(label="κ΅μ‘ νλ‘κ·Έλ¨ λ°μ΄ν° μ
λ‘λ", interactive=True)
|
94 |
analyze_button = gr.Button("λΆμ μμ", elem_classes="gradio-button")
|
95 |
output_text = gr.Textbox(label="λΆμ κ²°κ³Ό", interactive=False, elem_classes="gradio-textbox")
|
96 |
|
97 |
-
with gr.Column(scale=2):
|
98 |
-
gr.Markdown("<h3 style='color: #34495e;'>2. λΆμ
|
99 |
chart_output = gr.Plot(label="μκ°ν μ°¨νΈ")
|
100 |
csv_download = gr.File(label="μΆμ² κ²°κ³Ό λ€μ΄λ‘λ")
|
101 |
|
|
|
5 |
import networkx as nx
|
6 |
import matplotlib.pyplot as plt
|
7 |
import csv
|
|
|
8 |
import io
|
9 |
|
10 |
# Sentence-BERT λͺ¨λΈ λ‘λ
|
|
|
23 |
output.seek(0)
|
24 |
return output
|
25 |
|
26 |
+
# μλμΌλ‘ μ΄μ 맀μΉνλ ν¨μ
|
27 |
+
def auto_match_columns(df, required_cols):
|
28 |
+
"""
|
29 |
+
λ°μ΄ν°νλ μκ³Ό νμν μ΄ μ΄λ¦ λͺ©λ‘μ λ°μμ, μ μ¬ν μ΄ μ΄λ¦μ μλμΌλ‘ 맀μΉν©λλ€.
|
30 |
+
νμμ λ°λΌ μ΄ μ΄λ¦μ μ ννμ§ λͺ»ν κ²½μ° Noneμ λ°νν©λλ€.
|
31 |
+
"""
|
32 |
+
matched_cols = {}
|
33 |
+
for req_col in required_cols:
|
34 |
+
matched_col = None
|
35 |
+
for col in df.columns:
|
36 |
+
if req_col in col.lower(): # μ μ¬ν μ΄ μ΄λ¦μ 맀μΉ
|
37 |
+
matched_col = col
|
38 |
+
break
|
39 |
+
matched_cols[req_col] = matched_col
|
40 |
+
return matched_cols
|
41 |
+
|
42 |
+
# μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°μ μ΄μ μλμΌλ‘ 맀μΉνκ±°λ, μ ννκ² νλ ν¨μ
|
43 |
+
def validate_and_get_columns(employee_df, program_df):
|
44 |
+
# νμν μ΄
|
45 |
+
required_employee_cols = ["employee_id", "employee_name", "current_skills"]
|
46 |
+
required_program_cols = ["program_name", "skills_acquired", "duration"]
|
47 |
+
|
48 |
+
# μλμΌλ‘ λ§€μΉ μλ
|
49 |
+
employee_cols = auto_match_columns(employee_df, required_employee_cols)
|
50 |
+
program_cols = auto_match_columns(program_df, required_program_cols)
|
51 |
+
|
52 |
+
# μ§μ λ°μ΄ν° μ΄ μ€ μλ λ§€μΉ μ€ν¨ μ μ¬μ©μμκ² μ ννλλ‘ μ λ
|
53 |
+
for key, value in employee_cols.items():
|
54 |
+
if value is None:
|
55 |
+
return f"μ§μ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
|
56 |
+
|
57 |
+
# νλ‘κ·Έλ¨ λ°μ΄ν° μ΄ μ€ μλ λ§€μΉ μ€ν¨ μ μ¬μ©μμκ² μ ννλλ‘ μ λ
|
58 |
+
for key, value in program_cols.items():
|
59 |
+
if value is None:
|
60 |
+
return f"νλ‘κ·Έλ¨ λ°μ΄ν°μμ '{key}' μ΄μ μ νν μ μμ΅λλ€. μ¬λ°λ₯Έ μ΄μ μ ννμΈμ.", None, None
|
61 |
+
|
62 |
+
# μλ 맀μΉλ μ΄ λ°ν
|
63 |
+
return None, employee_cols, program_cols
|
64 |
+
|
65 |
# μ§μ λ°μ΄ν°λ₯Ό λΆμνμ¬ κ΅μ‘ νλ‘κ·Έλ¨μ μΆμ²νκ³ κ·Έλνλ₯Ό 그리λ ν¨μ
|
66 |
def analyze_data(employee_file, program_file):
|
67 |
+
# μ§μ λ°μ΄ν°μ νλ‘κ·Έλ¨ λ°μ΄ν° μ½κΈ°
|
68 |
employee_df = pd.read_csv(employee_file.name)
|
69 |
program_df = pd.read_csv(program_file.name)
|
70 |
|
71 |
+
# μ΄ μλ λ§€μΉ μλ
|
72 |
+
error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
|
73 |
+
if error_msg:
|
74 |
+
return error_msg, None, None
|
75 |
+
|
76 |
+
# μ§μμ μλκ³Ό νλ‘κ·Έλ¨ λͺ©νλ₯Ό 벑ν°ν
|
77 |
+
employee_skills = employee_df[employee_cols["current_skills"]].tolist()
|
78 |
+
program_skills = program_df[program_cols["skills_acquired"]].tolist()
|
79 |
employee_embeddings = model.encode(employee_skills)
|
80 |
program_embeddings = model.encode(program_skills)
|
81 |
|
82 |
# μ μ¬λ κ³μ°
|
83 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
84 |
|
85 |
+
# μ§μλ³ μΆμ² νλ‘κ·Έλ¨ μμ±
|
86 |
recommendations = []
|
87 |
+
recommendation_rows = [] # CSVλ‘ μ μ₯ν λ°μ΄ν°
|
88 |
for i, employee in employee_df.iterrows():
|
89 |
recommended_programs = []
|
90 |
for j, program in program_df.iterrows():
|
91 |
+
if similarities[i][j] > 0.5: # μ μ¬λ μκ³κ°
|
92 |
+
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
93 |
|
94 |
if recommended_programs:
|
95 |
+
recommendation = f"μ§μ {employee[employee_cols['employee_name']]}μ μΆμ² νλ‘κ·Έλ¨: {', '.join(recommended_programs)}"
|
96 |
+
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
|
97 |
else:
|
98 |
+
recommendation = f"μ§μ {employee[employee_cols['employee_name']]}μκ² μ ν©ν νλ‘κ·Έλ¨μ΄ μμ΅λλ€."
|
99 |
+
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "μ ν©ν νλ‘κ·Έλ¨ μμ"])
|
100 |
|
101 |
recommendations.append(recommendation)
|
102 |
|
103 |
# λ€νΈμν¬ κ·Έλν μμ±
|
104 |
G = nx.Graph()
|
105 |
+
for employee in employee_df[employee_cols['employee_name']]:
|
106 |
G.add_node(employee, type='employee')
|
107 |
|
108 |
+
for program in program_df[program_cols['program_name']]:
|
109 |
G.add_node(program, type='program')
|
110 |
|
111 |
for i, employee in employee_df.iterrows():
|
112 |
for j, program in program_df.iterrows():
|
113 |
+
if similarities[i][j] > 0.5:
|
114 |
+
G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
|
115 |
|
116 |
# κ·Έλν μκ°ν
|
117 |
plt.figure(figsize=(10, 8))
|
|
|
127 |
|
128 |
# Gradio λΈλ‘
|
129 |
with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
|
130 |
+
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>πΌ HybridRAG μμ€ν
</h1>")
|
131 |
|
132 |
with gr.Row():
|
133 |
+
with gr.Column(scale=1, min_width=300):
|
134 |
gr.Markdown("<h3 style='color: #34495e;'>1. μ§μ λ° νλ‘κ·Έλ¨ λ°μ΄ν°λ₯Ό μ
λ‘λνμΈμ</h3>")
|
135 |
employee_file = gr.File(label="μ§μ λ°μ΄ν° μ
λ‘λ", interactive=True)
|
136 |
program_file = gr.File(label="κ΅μ‘ νλ‘κ·Έλ¨ λ°μ΄ν° μ
λ‘λ", interactive=True)
|
137 |
analyze_button = gr.Button("λΆμ μμ", elem_classes="gradio-button")
|
138 |
output_text = gr.Textbox(label="λΆμ κ²°κ³Ό", interactive=False, elem_classes="gradio-textbox")
|
139 |
|
140 |
+
with gr.Column(scale=2, min_width=500):
|
141 |
+
gr.Markdown("<h3 style='color: #34495e;'>2. λΆμ κ²°κ³Ό λ° μκ°ν</h3>")
|
142 |
chart_output = gr.Plot(label="μκ°ν μ°¨νΈ")
|
143 |
csv_download = gr.File(label="μΆμ² κ²°κ³Ό λ€μ΄λ‘λ")
|
144 |
|