Spaces:

soojeongcrystal
/

hybridRAG

Sleeping

App Files Files Community

soojeongcrystal commited on Sep 7, 2024

Commit

1fe42c9

verified ·

1 Parent(s): 0352e69

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -12

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ def auto_match_columns(df, required_cols):
     for req_col in required_cols:
         matched_col = None
         for col in df.columns:
-            if req_col in col.lower():
                 matched_col = col
                 break
         matched_cols[req_col] = matched_col
@@ -81,21 +81,32 @@ def select_youtube_columns(youtube_file):
     youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
     column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
-    return gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("title")), \
-           gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("description")), \
-           gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("url")), \
-           gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("upload_date"))
 # 유튜브 콘텐츠 데이터 로드 및 처리 함수
 def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
     youtube_df = pd.read_csv(file_path)
-    youtube_df = youtube_df[[title_col, description_col, url_col, upload_date_col]]
-    youtube_df.columns = ['title', 'description', 'url', 'upload_date']
-    youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'])
     return youtube_df
 # 유튜브 콘텐츠와 교육 프로그램 매칭 함수
 def match_youtube_content(program_skills, youtube_df, model):
     youtube_embeddings = model.encode(youtube_df['description'].tolist())
     program_embeddings = model.encode(program_skills)
     similarities = cosine_similarity(program_embeddings, youtube_embeddings)
@@ -137,13 +148,15 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
                 recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
                 # 해당 프로그램과 가장 유사한 유튜브 콘텐츠 찾기
-                top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1]  # 상위 3개
-                for idx in top_youtube_indices:
-                    recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
         if recommended_programs:
             recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}"
-            youtube_recommendation = f"추천 유튜브 콘텐츠: {', '.join(recommended_youtube)}"
             recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
                                         ", ".join(recommended_programs), ", ".join(recommended_youtube)])
         else:

     for req_col in required_cols:
         matched_col = None
         for col in df.columns:
+            if req_col.lower() in col.lower():
                 matched_col = col
                 break
         matched_cols[req_col] = matched_col
     youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
     column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
+    return [gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get(col, "")) for col in required_youtube_cols]
 # 유튜브 콘텐츠 데이터 로드 및 처리 함수
 def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
     youtube_df = pd.read_csv(file_path)
+    selected_columns = [col for col in [title_col, description_col, url_col, upload_date_col] if col]
+    youtube_df = youtube_df[selected_columns]
+    # 선택된 열 이름을 필요한 열 이름으로 매핑
+    column_mapping = {
+        title_col: 'title',
+        description_col: 'description',
+        url_col: 'url',
+        upload_date_col: 'upload_date'
+    }
+    youtube_df.rename(columns=column_mapping, inplace=True)
+    if 'upload_date' in youtube_df.columns:
+        youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'], errors='coerce')
     return youtube_df
 # 유튜브 콘텐츠와 교육 프로그램 매칭 함수
 def match_youtube_content(program_skills, youtube_df, model):
+    if 'description' not in youtube_df.columns:
+        return None
     youtube_embeddings = model.encode(youtube_df['description'].tolist())
     program_embeddings = model.encode(program_skills)
     similarities = cosine_similarity(program_embeddings, youtube_embeddings)
                 recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
                 # 해당 프로그램과 가장 유사한 유튜브 콘텐츠 찾기
+                if youtube_similarities is not None:
+                    top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1]  # 상위 3개
+                    for idx in top_youtube_indices:
+                        if 'title' in youtube_df.columns and 'url' in youtube_df.columns:
+                            recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
         if recommended_programs:
             recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}"
+            youtube_recommendation = f"추천 유튜브 콘텐츠: {', '.join(recommended_youtube)}" if recommended_youtube else "추천할 유튜브 콘텐츠가 없습니다."
             recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
                                         ", ".join(recommended_programs), ", ".join(recommended_youtube)])
         else: