Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,7 @@ def auto_match_columns(df, required_cols):
|
|
49 |
for req_col in required_cols:
|
50 |
matched_col = None
|
51 |
for col in df.columns:
|
52 |
-
if req_col in col.lower():
|
53 |
matched_col = col
|
54 |
break
|
55 |
matched_cols[req_col] = matched_col
|
@@ -81,21 +81,32 @@ def select_youtube_columns(youtube_file):
|
|
81 |
youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
|
82 |
|
83 |
column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
|
84 |
-
return gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("
|
85 |
-
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("description")), \
|
86 |
-
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("url")), \
|
87 |
-
gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("upload_date"))
|
88 |
|
89 |
# ์ ํ๋ธ ์ฝํ
์ธ ๋ฐ์ดํฐ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ํจ์
|
90 |
def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
|
91 |
youtube_df = pd.read_csv(file_path)
|
92 |
-
|
93 |
-
youtube_df
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
return youtube_df
|
96 |
|
97 |
# ์ ํ๋ธ ์ฝํ
์ธ ์ ๊ต์ก ํ๋ก๊ทธ๋จ ๋งค์นญ ํจ์
|
98 |
def match_youtube_content(program_skills, youtube_df, model):
|
|
|
|
|
99 |
youtube_embeddings = model.encode(youtube_df['description'].tolist())
|
100 |
program_embeddings = model.encode(program_skills)
|
101 |
similarities = cosine_similarity(program_embeddings, youtube_embeddings)
|
@@ -137,13 +148,15 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
|
|
137 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
138 |
|
139 |
# ํด๋น ํ๋ก๊ทธ๋จ๊ณผ ๊ฐ์ฅ ์ ์ฌํ ์ ํ๋ธ ์ฝํ
์ธ ์ฐพ๊ธฐ
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
143 |
|
144 |
if recommended_programs:
|
145 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
|
146 |
-
youtube_recommendation = f"์ถ์ฒ ์ ํ๋ธ ์ฝํ
์ธ : {', '.join(recommended_youtube)}"
|
147 |
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
|
148 |
", ".join(recommended_programs), ", ".join(recommended_youtube)])
|
149 |
else:
|
|
|
49 |
for req_col in required_cols:
|
50 |
matched_col = None
|
51 |
for col in df.columns:
|
52 |
+
if req_col.lower() in col.lower():
|
53 |
matched_col = col
|
54 |
break
|
55 |
matched_cols[req_col] = matched_col
|
|
|
81 |
youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
|
82 |
|
83 |
column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
|
84 |
+
return [gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get(col, "")) for col in required_youtube_cols]
|
|
|
|
|
|
|
85 |
|
86 |
# ์ ํ๋ธ ์ฝํ
์ธ ๋ฐ์ดํฐ ๋ก๋ ๋ฐ ์ฒ๋ฆฌ ํจ์
|
87 |
def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
|
88 |
youtube_df = pd.read_csv(file_path)
|
89 |
+
selected_columns = [col for col in [title_col, description_col, url_col, upload_date_col] if col]
|
90 |
+
youtube_df = youtube_df[selected_columns]
|
91 |
+
|
92 |
+
# ์ ํ๋ ์ด ์ด๋ฆ์ ํ์ํ ์ด ์ด๋ฆ์ผ๋ก ๋งคํ
|
93 |
+
column_mapping = {
|
94 |
+
title_col: 'title',
|
95 |
+
description_col: 'description',
|
96 |
+
url_col: 'url',
|
97 |
+
upload_date_col: 'upload_date'
|
98 |
+
}
|
99 |
+
youtube_df.rename(columns=column_mapping, inplace=True)
|
100 |
+
|
101 |
+
if 'upload_date' in youtube_df.columns:
|
102 |
+
youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'], errors='coerce')
|
103 |
+
|
104 |
return youtube_df
|
105 |
|
106 |
# ์ ํ๋ธ ์ฝํ
์ธ ์ ๊ต์ก ํ๋ก๊ทธ๋จ ๋งค์นญ ํจ์
|
107 |
def match_youtube_content(program_skills, youtube_df, model):
|
108 |
+
if 'description' not in youtube_df.columns:
|
109 |
+
return None
|
110 |
youtube_embeddings = model.encode(youtube_df['description'].tolist())
|
111 |
program_embeddings = model.encode(program_skills)
|
112 |
similarities = cosine_similarity(program_embeddings, youtube_embeddings)
|
|
|
148 |
recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
|
149 |
|
150 |
# ํด๋น ํ๋ก๊ทธ๋จ๊ณผ ๊ฐ์ฅ ์ ์ฌํ ์ ํ๋ธ ์ฝํ
์ธ ์ฐพ๊ธฐ
|
151 |
+
if youtube_similarities is not None:
|
152 |
+
top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1] # ์์ 3๊ฐ
|
153 |
+
for idx in top_youtube_indices:
|
154 |
+
if 'title' in youtube_df.columns and 'url' in youtube_df.columns:
|
155 |
+
recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
|
156 |
|
157 |
if recommended_programs:
|
158 |
recommendation = f"์ง์ {employee[employee_cols['employee_name']]}์ ์ถ์ฒ ํ๋ก๊ทธ๋จ: {', '.join(recommended_programs)}"
|
159 |
+
youtube_recommendation = f"์ถ์ฒ ์ ํ๋ธ ์ฝํ
์ธ : {', '.join(recommended_youtube)}" if recommended_youtube else "์ถ์ฒํ ์ ํ๋ธ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค."
|
160 |
recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
|
161 |
", ".join(recommended_programs), ", ".join(recommended_youtube)])
|
162 |
else:
|