Spaces:
Sleeping
Sleeping
soojeongcrystal
commited on
Commit
โข
0352e69
1
Parent(s):
7b0fd85
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import matplotlib.pyplot as plt
|
|
7 |
import csv
|
8 |
import io
|
9 |
import matplotlib.font_manager as fm
|
|
|
10 |
|
11 |
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
|
12 |
model = SentenceTransformer('jhgan/ko-sbert-sts')
|
@@ -14,15 +15,16 @@ model = SentenceTransformer('jhgan/ko-sbert-sts')
|
|
14 |
# ๋๋๋ฐ๋ฅธ๊ณ ๋ ํฐํธ ์ค์ (ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ๋ง๊ฒ ์์ )
|
15 |
plt.rc('font', family='NanumBarunGothic')
|
16 |
|
17 |
-
# ์ ์ญ
|
18 |
global_recommendations = None
|
19 |
global_csv_string = None
|
|
|
20 |
|
21 |
# CSV ๋ฌธ์์ด ์์ฑ ํจ์
|
22 |
def create_csv_string(recommendations):
|
23 |
output = io.StringIO()
|
24 |
writer = csv.writer(output)
|
25 |
-
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
|
26 |
for rec in recommendations:
|
27 |
writer.writerow(rec)
|
28 |
return output.getvalue()
|
@@ -71,12 +73,40 @@ def validate_and_get_columns(employee_df, program_df):
|
|
71 |
|
72 |
return None, employee_cols, program_cols
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ , ํ
์ด๋ธ๊ณผ ๊ทธ๋ํ๋ฅผ ์์ฑํ๋ ํจ์
|
75 |
-
def hybrid_rag(employee_file, program_file):
|
76 |
global global_recommendations
|
77 |
global global_csv_string
|
78 |
|
79 |
-
#
|
80 |
employee_df = pd.read_csv(employee_file.name)
|
81 |
program_df = pd.read_csv(program_file.name)
|
82 |
|
@@ -91,22 +121,38 @@ def hybrid_rag(employee_file, program_file):
|
|
91 |
|
92 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
recommendations = []
|
95 |
-
recommendation_rows = []
|
96 |
for i, employee in employee_df.iterrows():
|
97 |
recommended_programs = []
|
|
|
98 |
for j, program in program_df.iterrows():
|
99 |
if similarities[i][j] > 0.5:
|
100 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
if recommended_programs:
|
103 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
|
104 |
-
|
|
|
|
|
105 |
else:
|
106 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์๊ฒ ์ ํฉํ ํ๋ก๊ทธ๋จ์ด ์์ต๋๋ค."
|
107 |
-
|
|
|
|
|
108 |
|
109 |
-
recommendations.append(recommendation)
|
110 |
|
111 |
global_recommendations = recommendation_rows
|
112 |
|
@@ -129,7 +175,7 @@ def hybrid_rag(employee_file, program_file):
|
|
129 |
global_csv_string = create_csv_string(recommendation_rows)
|
130 |
|
131 |
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
132 |
-
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs"])
|
133 |
|
134 |
return result_df, chart_buffer, gr.File.update(visible=True)
|
135 |
|
@@ -141,7 +187,7 @@ def chat_response(message, history):
|
|
141 |
|
142 |
for employee in global_recommendations:
|
143 |
if employee[1].lower() in message.lower():
|
144 |
-
return f"{employee[1]}๋์๊ฒ ์ถ์ฒ๋ ํ๋ก๊ทธ๋จ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค: {employee[2]}"
|
145 |
|
146 |
return "์ฃ์กํฉ๋๋ค. ํด๋น ์ง์์ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ๋ค๋ฅธ ์ง์ ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์."
|
147 |
|
@@ -154,13 +200,23 @@ def download_csv():
|
|
154 |
|
155 |
# Gradio ๋ธ๋ก
|
156 |
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
|
157 |
-
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐ผ HybridRAG
|
158 |
|
159 |
with gr.Row():
|
160 |
with gr.Column(scale=1, min_width=300):
|
161 |
-
gr.Markdown("<h3 style='color: #34495e;'>1.
|
162 |
employee_file = gr.File(label="์ง์ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
163 |
program_file = gr.File(label="๊ต์ก ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
analyze_button = gr.Button("๋ถ์ ์์", elem_classes="gradio-button")
|
165 |
output_table = gr.DataFrame(label="๋ถ์ ๊ฒฐ๊ณผ (ํ
์ด๋ธ)")
|
166 |
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", visible=False)
|
@@ -169,13 +225,15 @@ with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .g
|
|
169 |
gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ์ ๊ฒฐ๊ณผ ๋ฐ ์๊ฐํ</h3>")
|
170 |
chart_output = gr.Image(label="์๊ฐํ ์ฐจํธ")
|
171 |
|
172 |
-
gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์๋ณ ์ถ์ฒ ํ๋ก๊ทธ๋จ ํ์ธ</h3>")
|
173 |
chatbot = gr.Chatbot()
|
174 |
msg = gr.Textbox(label="์ง์ ์ด๋ฆ์ ์
๋ ฅํ์ธ์")
|
175 |
clear = gr.Button("๋ํ ๋ด์ญ ์ง์ฐ๊ธฐ")
|
176 |
|
177 |
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ, ์ฐจํธ, ํ์ผ ๋ค์ด๋ก๋๋ฅผ ์
๋ฐ์ดํธ
|
178 |
-
analyze_button.click(hybrid_rag,
|
|
|
|
|
179 |
|
180 |
# CSV ๋ค์ด๋ก๋ ๋ฒํผ
|
181 |
csv_download.click(download_csv, inputs=[], outputs=[csv_download])
|
|
|
7 |
import csv
|
8 |
import io
|
9 |
import matplotlib.font_manager as fm
|
10 |
+
from datetime import datetime, timedelta
|
11 |
|
12 |
# ํ๊ตญ์ด ์ฒ๋ฆฌ๋ฅผ ์ํ KoSentence-BERT ๋ชจ๋ธ ๋ก๋
|
13 |
model = SentenceTransformer('jhgan/ko-sbert-sts')
|
|
|
15 |
# ๋๋๋ฐ๋ฅธ๊ณ ๋ ํฐํธ ์ค์ (ํ๊น
ํ์ด์ค ํ๊ฒฝ์ ๋ง๊ฒ ์์ )
|
16 |
plt.rc('font', family='NanumBarunGothic')
|
17 |
|
18 |
+
# ์ ์ญ ๋ณ์
|
19 |
global_recommendations = None
|
20 |
global_csv_string = None
|
21 |
+
youtube_columns = None
|
22 |
|
23 |
# CSV ๋ฌธ์์ด ์์ฑ ํจ์
|
24 |
def create_csv_string(recommendations):
|
25 |
output = io.StringIO()
|
26 |
writer = csv.writer(output)
|
27 |
+
writer.writerow(["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
|
28 |
for rec in recommendations:
|
29 |
writer.writerow(rec)
|
30 |
return output.getvalue()
|
|
|
73 |
|
74 |
return None, employee_cols, program_cols
|
75 |
|
76 |
+
# ์ ํ๋ธ ๋ฐ์ดํฐ ์ด ์ ํ ํจ์
|
77 |
+
def select_youtube_columns(youtube_file):
|
78 |
+
global youtube_columns
|
79 |
+
youtube_df = pd.read_csv(youtube_file.name)
|
80 |
+
required_youtube_cols = ["title", "description", "url", "upload_date"]
|
81 |
+
youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
|
82 |
+
|
83 |
+
column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
|
84 |
+
return gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("title")), \
|
85 |
+
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("description")), \
|
86 |
+
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("url")), \
|
87 |
+
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("upload_date"))
|
88 |
+
|
89 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ๋ฐ์ดํฐ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ํจ์
|
90 |
+
def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
|
91 |
+
youtube_df = pd.read_csv(file_path)
|
92 |
+
youtube_df = youtube_df[[title_col, description_col, url_col, upload_date_col]]
|
93 |
+
youtube_df.columns = ['title', 'description', 'url', 'upload_date']
|
94 |
+
youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'])
|
95 |
+
return youtube_df
|
96 |
+
|
97 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ์ ๊ต์ก ํ๋ก๊ทธ๋จ ๋งค์นญ ํจ์
|
98 |
+
def match_youtube_content(program_skills, youtube_df, model):
|
99 |
+
youtube_embeddings = model.encode(youtube_df['description'].tolist())
|
100 |
+
program_embeddings = model.encode(program_skills)
|
101 |
+
similarities = cosine_similarity(program_embeddings, youtube_embeddings)
|
102 |
+
return similarities
|
103 |
+
|
104 |
# ์ง์ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ์ฌ ๊ต์ก ํ๋ก๊ทธ๋จ์ ์ถ์ฒํ๊ณ , ํ
์ด๋ธ๊ณผ ๊ทธ๋ํ๋ฅผ ์์ฑํ๋ ํจ์
|
105 |
+
def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
|
106 |
global global_recommendations
|
107 |
global global_csv_string
|
108 |
|
109 |
+
# ์ง์ ๋ฐ ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ๋ก๋
|
110 |
employee_df = pd.read_csv(employee_file.name)
|
111 |
program_df = pd.read_csv(program_file.name)
|
112 |
|
|
|
121 |
|
122 |
similarities = cosine_similarity(employee_embeddings, program_embeddings)
|
123 |
|
124 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ
|
125 |
+
youtube_df = load_youtube_content(youtube_file.name, title_col, description_col, url_col, upload_date_col)
|
126 |
+
|
127 |
+
# ์ ํ๋ธ ์ฝํ
์ธ ์ ๊ต์ก ํ๋ก๊ทธ๋จ ๋งค์นญ
|
128 |
+
youtube_similarities = match_youtube_content(program_df[program_cols['skills_acquired']].tolist(), youtube_df, model)
|
129 |
+
|
130 |
recommendations = []
|
131 |
+
recommendation_rows = []
|
132 |
for i, employee in employee_df.iterrows():
|
133 |
recommended_programs = []
|
134 |
+
recommended_youtube = []
|
135 |
for j, program in program_df.iterrows():
|
136 |
if similarities[i][j] > 0.5:
|
137 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
138 |
+
|
139 |
+
# ํด๋น ํ๋ก๊ทธ๋จ๊ณผ ๊ฐ์ฅ ์ ์ฌํ ์ ํ๋ธ ์ฝํ
์ธ ์ฐพ๊ธฐ
|
140 |
+
top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1] # ์์ 3๊ฐ
|
141 |
+
for idx in top_youtube_indices:
|
142 |
+
recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
|
143 |
|
144 |
if recommended_programs:
|
145 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
|
146 |
+
youtube_recommendation = f"์ถ์ฒ ์ ํ๋ธ ์ฝํ
์ธ : {', '.join(recommended_youtube)}"
|
147 |
+
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
|
148 |
+
", ".join(recommended_programs), ", ".join(recommended_youtube)])
|
149 |
else:
|
150 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์๊ฒ ์ ํฉํ ํ๋ก๊ทธ๋จ์ด ์์ต๋๋ค."
|
151 |
+
youtube_recommendation = "์ถ์ฒํ ์ ํ๋ธ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค."
|
152 |
+
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
|
153 |
+
"์ ํฉํ ํ๋ก๊ทธ๋จ ์์", "์ถ์ฒ ์ฝํ
์ธ ์์"])
|
154 |
|
155 |
+
recommendations.append(recommendation + "\n" + youtube_recommendation)
|
156 |
|
157 |
global_recommendations = recommendation_rows
|
158 |
|
|
|
175 |
global_csv_string = create_csv_string(recommendation_rows)
|
176 |
|
177 |
# ๊ฒฐ๊ณผ ํ
์ด๋ธ ๋ฐ์ดํฐํ๋ ์ ์์ฑ
|
178 |
+
result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
|
179 |
|
180 |
return result_df, chart_buffer, gr.File.update(visible=True)
|
181 |
|
|
|
187 |
|
188 |
for employee in global_recommendations:
|
189 |
if employee[1].lower() in message.lower():
|
190 |
+
return f"{employee[1]}๋์๊ฒ ์ถ์ฒ๋ ํ๋ก๊ทธ๋จ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค: {employee[2]}\n\n์ถ์ฒ ์ ํ๋ธ ์ฝํ
์ธ : {employee[3]}"
|
191 |
|
192 |
return "์ฃ์กํฉ๋๋ค. ํด๋น ์ง์์ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. ๋ค๋ฅธ ์ง์ ์ด๋ฆ์ ์
๋ ฅํด์ฃผ์ธ์."
|
193 |
|
|
|
200 |
|
201 |
# Gradio ๋ธ๋ก
|
202 |
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
|
203 |
+
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐ผ HybridRAG ์์คํ
(์ ํ๋ธ ์ฝํ
์ธ ํฌํจ)</h1>")
|
204 |
|
205 |
with gr.Row():
|
206 |
with gr.Column(scale=1, min_width=300):
|
207 |
+
gr.Markdown("<h3 style='color: #34495e;'>1. ๋ฐ์ดํฐ๋ฅผ ์
๋ก๋ํ์ธ์</h3>")
|
208 |
employee_file = gr.File(label="์ง์ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
209 |
program_file = gr.File(label="๊ต์ก ํ๋ก๊ทธ๋จ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
210 |
+
youtube_file = gr.File(label="์ ํ๋ธ ์ฝํ
์ธ ๋ฐ์ดํฐ ์
๋ก๋", interactive=True)
|
211 |
+
|
212 |
+
gr.Markdown("<h4 style='color: #34495e;'>์ ํ๋ธ ๋ฐ์ดํฐ ์ด ์ ํ</h4>")
|
213 |
+
title_col = gr.Dropdown(label="์ ๋ชฉ ์ด")
|
214 |
+
description_col = gr.Dropdown(label="์ค๋ช
์ด")
|
215 |
+
url_col = gr.Dropdown(label="URL ์ด")
|
216 |
+
upload_date_col = gr.Dropdown(label="์
๋ก๋ ๋ ์ง ์ด")
|
217 |
+
|
218 |
+
youtube_file.change(select_youtube_columns, inputs=[youtube_file], outputs=[title_col, description_col, url_col, upload_date_col])
|
219 |
+
|
220 |
analyze_button = gr.Button("๋ถ์ ์์", elem_classes="gradio-button")
|
221 |
output_table = gr.DataFrame(label="๋ถ์ ๊ฒฐ๊ณผ (ํ
์ด๋ธ)")
|
222 |
csv_download = gr.File(label="์ถ์ฒ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", visible=False)
|
|
|
225 |
gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ์ ๊ฒฐ๊ณผ ๋ฐ ์๊ฐํ</h3>")
|
226 |
chart_output = gr.Image(label="์๊ฐํ ์ฐจํธ")
|
227 |
|
228 |
+
gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์๋ณ ์ถ์ฒ ํ๋ก๊ทธ๋จ ๋ฐ ์ ํ๋ธ ์ฝํ
์ธ ํ์ธ</h3>")
|
229 |
chatbot = gr.Chatbot()
|
230 |
msg = gr.Textbox(label="์ง์ ์ด๋ฆ์ ์
๋ ฅํ์ธ์")
|
231 |
clear = gr.Button("๋ํ ๋ด์ญ ์ง์ฐ๊ธฐ")
|
232 |
|
233 |
# ๋ถ์ ๋ฒํผ ํด๋ฆญ ์ ํ
์ด๋ธ, ์ฐจํธ, ํ์ผ ๋ค์ด๋ก๋๋ฅผ ์
๋ฐ์ดํธ
|
234 |
+
analyze_button.click(hybrid_rag,
|
235 |
+
inputs=[employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col],
|
236 |
+
outputs=[output_table, chart_output, csv_download])
|
237 |
|
238 |
# CSV ๋ค์ด๋ก๋ ๋ฒํผ
|
239 |
csv_download.click(download_csv, inputs=[], outputs=[csv_download])
|