soojeongcrystal commited on
Commit
0352e69
โ€ข
1 Parent(s): 7b0fd85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -14
app.py CHANGED
@@ -7,6 +7,7 @@ import matplotlib.pyplot as plt
7
  import csv
8
  import io
9
  import matplotlib.font_manager as fm
 
10
 
11
  # ํ•œ๊ตญ์–ด ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ KoSentence-BERT ๋ชจ๋ธ ๋กœ๋“œ
12
  model = SentenceTransformer('jhgan/ko-sbert-sts')
@@ -14,15 +15,16 @@ model = SentenceTransformer('jhgan/ko-sbert-sts')
14
  # ๋‚˜๋ˆ”๋ฐ”๋ฅธ๊ณ ๋”• ํฐํŠธ ์„ค์ • (ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์ˆ˜์ •)
15
  plt.rc('font', family='NanumBarunGothic')
16
 
17
- # ์ „์—ญ ๋ณ€์ˆ˜๋กœ ๋ถ„์„ ๊ฒฐ๊ณผ ์ €์žฅ
18
  global_recommendations = None
19
  global_csv_string = None
 
20
 
21
  # CSV ๋ฌธ์ž์—ด ์ƒ์„ฑ ํ•จ์ˆ˜
22
  def create_csv_string(recommendations):
23
  output = io.StringIO()
24
  writer = csv.writer(output)
25
- writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"])
26
  for rec in recommendations:
27
  writer.writerow(rec)
28
  return output.getvalue()
@@ -71,12 +73,40 @@ def validate_and_get_columns(employee_df, program_df):
71
 
72
  return None, employee_cols, program_cols
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # ์ง์› ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ์„ ์ถ”์ฒœํ•˜๊ณ , ํ…Œ์ด๋ธ”๊ณผ ๊ทธ๋ž˜ํ”„๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
75
- def hybrid_rag(employee_file, program_file):
76
  global global_recommendations
77
  global global_csv_string
78
 
79
- # 1. VectorRAG: KoSentence-BERT๋ฅผ ์ด์šฉํ•œ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
80
  employee_df = pd.read_csv(employee_file.name)
81
  program_df = pd.read_csv(program_file.name)
82
 
@@ -91,22 +121,38 @@ def hybrid_rag(employee_file, program_file):
91
 
92
  similarities = cosine_similarity(employee_embeddings, program_embeddings)
93
 
 
 
 
 
 
 
94
  recommendations = []
95
- recommendation_rows = [] # ํ…Œ์ด๋ธ” ๋ฐ CSV๋กœ ์ €์žฅํ•  ๋ฐ์ดํ„ฐ
96
  for i, employee in employee_df.iterrows():
97
  recommended_programs = []
 
98
  for j, program in program_df.iterrows():
99
  if similarities[i][j] > 0.5:
100
  recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
 
 
 
 
 
101
 
102
  if recommended_programs:
103
  recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์˜ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ: {', '.join(recommended_programs)}"
104
- recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
 
 
105
  else:
106
  recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์—๊ฒŒ ์ ํ•ฉํ•œ ํ”„๋กœ๊ทธ๋žจ์ด ์—†์Šต๋‹ˆ๋‹ค."
107
- recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "์ ํ•ฉํ•œ ํ”„๋กœ๊ทธ๋žจ ์—†์Œ"])
 
 
108
 
109
- recommendations.append(recommendation)
110
 
111
  global_recommendations = recommendation_rows
112
 
@@ -129,7 +175,7 @@ def hybrid_rag(employee_file, program_file):
129
  global_csv_string = create_csv_string(recommendation_rows)
130
 
131
  # ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ
132
- result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs"])
133
 
134
  return result_df, chart_buffer, gr.File.update(visible=True)
135
 
@@ -141,7 +187,7 @@ def chat_response(message, history):
141
 
142
  for employee in global_recommendations:
143
  if employee[1].lower() in message.lower():
144
- return f"{employee[1]}๋‹˜์—๊ฒŒ ์ถ”์ฒœ๋œ ํ”„๋กœ๊ทธ๋žจ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {employee[2]}"
145
 
146
  return "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ํ•ด๋‹น ์ง์›์˜ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์ง์› ์ด๋ฆ„์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
147
 
@@ -154,13 +200,23 @@ def download_csv():
154
 
155
  # Gradio ๋ธ”๋ก
156
  with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
157
- gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐Ÿ’ผ HybridRAG ์‹œ์Šคํ…œ</h1>")
158
 
159
  with gr.Row():
160
  with gr.Column(scale=1, min_width=300):
161
- gr.Markdown("<h3 style='color: #34495e;'>1. ์ง์› ๋ฐ ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”</h3>")
162
  employee_file = gr.File(label="์ง์› ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
163
  program_file = gr.File(label="๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
 
 
 
 
 
 
 
 
 
 
164
  analyze_button = gr.Button("๋ถ„์„ ์‹œ์ž‘", elem_classes="gradio-button")
165
  output_table = gr.DataFrame(label="๋ถ„์„ ๊ฒฐ๊ณผ (ํ…Œ์ด๋ธ”)")
166
  csv_download = gr.File(label="์ถ”์ฒœ ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ", visible=False)
@@ -169,13 +225,15 @@ with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .g
169
  gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ„์„ ๊ฒฐ๊ณผ ๋ฐ ์‹œ๊ฐํ™”</h3>")
170
  chart_output = gr.Image(label="์‹œ๊ฐํ™” ์ฐจํŠธ")
171
 
172
- gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์›๋ณ„ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ ํ™•์ธ</h3>")
173
  chatbot = gr.Chatbot()
174
  msg = gr.Textbox(label="์ง์› ์ด๋ฆ„์„ ์ž…๋ ฅํ•˜์„ธ์š”")
175
  clear = gr.Button("๋Œ€ํ™” ๋‚ด์—ญ ์ง€์šฐ๊ธฐ")
176
 
177
  # ๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ…Œ์ด๋ธ”, ์ฐจํŠธ, ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ๋ฅผ ์—…๋ฐ์ดํŠธ
178
- analyze_button.click(hybrid_rag, inputs=[employee_file, program_file], outputs=[output_table, chart_output, csv_download])
 
 
179
 
180
  # CSV ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ
181
  csv_download.click(download_csv, inputs=[], outputs=[csv_download])
 
7
  import csv
8
  import io
9
  import matplotlib.font_manager as fm
10
+ from datetime import datetime, timedelta
11
 
12
  # ํ•œ๊ตญ์–ด ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ KoSentence-BERT ๋ชจ๋ธ ๋กœ๋“œ
13
  model = SentenceTransformer('jhgan/ko-sbert-sts')
 
15
  # ๋‚˜๋ˆ”๋ฐ”๋ฅธ๊ณ ๋”• ํฐํŠธ ์„ค์ • (ํ—ˆ๊น…ํŽ˜์ด์Šค ํ™˜๊ฒฝ์— ๋งž๊ฒŒ ์ˆ˜์ •)
16
  plt.rc('font', family='NanumBarunGothic')
17
 
18
+ # ์ „์—ญ ๋ณ€์ˆ˜
19
  global_recommendations = None
20
  global_csv_string = None
21
+ youtube_columns = None
22
 
23
  # CSV ๋ฌธ์ž์—ด ์ƒ์„ฑ ํ•จ์ˆ˜
24
  def create_csv_string(recommendations):
25
  output = io.StringIO()
26
  writer = csv.writer(output)
27
+ writer.writerow(["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
28
  for rec in recommendations:
29
  writer.writerow(rec)
30
  return output.getvalue()
 
73
 
74
  return None, employee_cols, program_cols
75
 
76
+ # ์œ ํŠœ๋ธŒ ๋ฐ์ดํ„ฐ ์—ด ์„ ํƒ ํ•จ์ˆ˜
77
+ def select_youtube_columns(youtube_file):
78
+ global youtube_columns
79
+ youtube_df = pd.read_csv(youtube_file.name)
80
+ required_youtube_cols = ["title", "description", "url", "upload_date"]
81
+ youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
82
+
83
+ column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
84
+ return gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("title")), \
85
+ gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("description")), \
86
+ gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("url")), \
87
+ gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("upload_date"))
88
+
89
+ # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ๋ฐ์ดํ„ฐ ๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
90
+ def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
91
+ youtube_df = pd.read_csv(file_path)
92
+ youtube_df = youtube_df[[title_col, description_col, url_col, upload_date_col]]
93
+ youtube_df.columns = ['title', 'description', 'url', 'upload_date']
94
+ youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'])
95
+ return youtube_df
96
+
97
+ # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ ์™€ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋งค์นญ ํ•จ์ˆ˜
98
+ def match_youtube_content(program_skills, youtube_df, model):
99
+ youtube_embeddings = model.encode(youtube_df['description'].tolist())
100
+ program_embeddings = model.encode(program_skills)
101
+ similarities = cosine_similarity(program_embeddings, youtube_embeddings)
102
+ return similarities
103
+
104
  # ์ง์› ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜์—ฌ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ์„ ์ถ”์ฒœํ•˜๊ณ , ํ…Œ์ด๋ธ”๊ณผ ๊ทธ๋ž˜ํ”„๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
105
+ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
106
  global global_recommendations
107
  global global_csv_string
108
 
109
+ # ์ง์› ๋ฐ ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ ๋กœ๋“œ
110
  employee_df = pd.read_csv(employee_file.name)
111
  program_df = pd.read_csv(program_file.name)
112
 
 
121
 
122
  similarities = cosine_similarity(employee_embeddings, program_embeddings)
123
 
124
+ # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ
125
+ youtube_df = load_youtube_content(youtube_file.name, title_col, description_col, url_col, upload_date_col)
126
+
127
+ # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ ์™€ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋งค์นญ
128
+ youtube_similarities = match_youtube_content(program_df[program_cols['skills_acquired']].tolist(), youtube_df, model)
129
+
130
  recommendations = []
131
+ recommendation_rows = []
132
  for i, employee in employee_df.iterrows():
133
  recommended_programs = []
134
+ recommended_youtube = []
135
  for j, program in program_df.iterrows():
136
  if similarities[i][j] > 0.5:
137
  recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
138
+
139
+ # ํ•ด๋‹น ํ”„๋กœ๊ทธ๋žจ๊ณผ ๊ฐ€์žฅ ์œ ์‚ฌํ•œ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ์ฐพ๊ธฐ
140
+ top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1] # ์ƒ์œ„ 3๊ฐœ
141
+ for idx in top_youtube_indices:
142
+ recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
143
 
144
  if recommended_programs:
145
  recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์˜ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ: {', '.join(recommended_programs)}"
146
+ youtube_recommendation = f"์ถ”์ฒœ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ : {', '.join(recommended_youtube)}"
147
+ recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
148
+ ", ".join(recommended_programs), ", ".join(recommended_youtube)])
149
  else:
150
  recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์—๊ฒŒ ์ ํ•ฉํ•œ ํ”„๋กœ๊ทธ๋žจ์ด ์—†์Šต๋‹ˆ๋‹ค."
151
+ youtube_recommendation = "์ถ”์ฒœํ•  ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
152
+ recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
153
+ "์ ํ•ฉํ•œ ํ”„๋กœ๊ทธ๋žจ ์—†์Œ", "์ถ”์ฒœ ์ฝ˜ํ…์ธ  ์—†์Œ"])
154
 
155
+ recommendations.append(recommendation + "\n" + youtube_recommendation)
156
 
157
  global_recommendations = recommendation_rows
158
 
 
175
  global_csv_string = create_csv_string(recommendation_rows)
176
 
177
  # ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” ๋ฐ์ดํ„ฐํ”„๋ ˆ์ž„ ์ƒ์„ฑ
178
+ result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
179
 
180
  return result_df, chart_buffer, gr.File.update(visible=True)
181
 
 
187
 
188
  for employee in global_recommendations:
189
  if employee[1].lower() in message.lower():
190
+ return f"{employee[1]}๋‹˜์—๊ฒŒ ์ถ”์ฒœ๋œ ํ”„๋กœ๊ทธ๋žจ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {employee[2]}\n\n์ถ”์ฒœ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ : {employee[3]}"
191
 
192
  return "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ํ•ด๋‹น ์ง์›์˜ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์ง์› ์ด๋ฆ„์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
193
 
 
200
 
201
  # Gradio ๋ธ”๋ก
202
  with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
203
+ gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>๐Ÿ’ผ HybridRAG ์‹œ์Šคํ…œ (์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ํฌํ•จ)</h1>")
204
 
205
  with gr.Row():
206
  with gr.Column(scale=1, min_width=300):
207
+ gr.Markdown("<h3 style='color: #34495e;'>1. ๋ฐ์ดํ„ฐ๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”</h3>")
208
  employee_file = gr.File(label="์ง์› ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
209
  program_file = gr.File(label="๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
210
+ youtube_file = gr.File(label="์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ", interactive=True)
211
+
212
+ gr.Markdown("<h4 style='color: #34495e;'>์œ ํŠœ๋ธŒ ๋ฐ์ดํ„ฐ ์—ด ์„ ํƒ</h4>")
213
+ title_col = gr.Dropdown(label="์ œ๋ชฉ ์—ด")
214
+ description_col = gr.Dropdown(label="์„ค๋ช… ์—ด")
215
+ url_col = gr.Dropdown(label="URL ์—ด")
216
+ upload_date_col = gr.Dropdown(label="์—…๋กœ๋“œ ๋‚ ์งœ ์—ด")
217
+
218
+ youtube_file.change(select_youtube_columns, inputs=[youtube_file], outputs=[title_col, description_col, url_col, upload_date_col])
219
+
220
  analyze_button = gr.Button("๋ถ„์„ ์‹œ์ž‘", elem_classes="gradio-button")
221
  output_table = gr.DataFrame(label="๋ถ„์„ ๊ฒฐ๊ณผ (ํ…Œ์ด๋ธ”)")
222
  csv_download = gr.File(label="์ถ”์ฒœ ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ", visible=False)
 
225
  gr.Markdown("<h3 style='color: #34495e;'>2. ๋ถ„์„ ๊ฒฐ๊ณผ ๋ฐ ์‹œ๊ฐํ™”</h3>")
226
  chart_output = gr.Image(label="์‹œ๊ฐํ™” ์ฐจํŠธ")
227
 
228
+ gr.Markdown("<h3 style='color: #34495e;'>3. ์ง์›๋ณ„ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ ๋ฐ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ํ™•์ธ</h3>")
229
  chatbot = gr.Chatbot()
230
  msg = gr.Textbox(label="์ง์› ์ด๋ฆ„์„ ์ž…๋ ฅํ•˜์„ธ์š”")
231
  clear = gr.Button("๋Œ€ํ™” ๋‚ด์—ญ ์ง€์šฐ๊ธฐ")
232
 
233
  # ๋ถ„์„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ…Œ์ด๋ธ”, ์ฐจํŠธ, ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ๋ฅผ ์—…๋ฐ์ดํŠธ
234
+ analyze_button.click(hybrid_rag,
235
+ inputs=[employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col],
236
+ outputs=[output_table, chart_output, csv_download])
237
 
238
  # CSV ๋‹ค์šด๋กœ๋“œ ๋ฒ„ํŠผ
239
  csv_download.click(download_csv, inputs=[], outputs=[csv_download])