Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""Skill Transformation Journey.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw | |
""" | |
import gradio as gr | |
import re | |
import openai | |
from openai import OpenAI | |
import pandas as pd | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import linear_kernel | |
client = OpenAI( | |
# defaults to os.environ.get("OPENAI_API_KEY") | |
api_key="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", | |
) | |
# Specify the sheet name in the Excel file | |
excel_file_path = "1.csv" | |
sheet_name = "Shortlisted Courses" # Replace with the actual sheet name | |
# Read the Excel file into a Pandas DataFrame | |
courses_df = pd.read_csv(excel_file_path) | |
# Create a TF-IDF vectorizer | |
tfidf_vectorizer = TfidfVectorizer(stop_words='english') | |
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna('')) | |
def html_coversion(gpt_content): | |
# Provided data in text format | |
data_text = gpt_content | |
# Extract course details using a modified regular expression | |
courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text) | |
# Process each tuple to remove the second occurrence of the course link | |
processed_courses = [] | |
for course_tuple in courses: | |
# Find the index of the second occurrence of the course link | |
index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1) | |
# Remove the second occurrence of the course link from the tuple | |
processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:] | |
processed_courses.append(processed_tuple) | |
# Convert the processed list of tuples into a DataFrame | |
df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link']) | |
# Convert the DataFrame to an HTML table | |
html_table = df.to_html(index=False, escape=False) | |
# Print or save the HTML table | |
return html_table | |
# Function to recommend courses based on user input using GPT and TF-IDF | |
def recommend_courses(user_skill): | |
# Combine user's input into a single string for TF-IDF | |
user_input = f"{user_skill}" | |
# Use TF-IDF and cosine similarity for initial course recommendations | |
user_vector = tfidf_vectorizer.transform([user_input]) | |
cosine_similarities = linear_kernel(user_vector, tfidf_matrix) | |
# Get initial course recommendations based on similarity scores | |
recommendations = courses_df.copy() | |
recommendations['Similarity'] = cosine_similarities[0] | |
# Sort by similarity and get top recommendations | |
top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5) | |
# Generate a text summary of the initial recommendations | |
initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False) | |
# Assume GPT generates HTML-formatted final recommendations | |
final_recommendations_html = html_coversion(initial_recommendations_text) | |
return final_recommendations_html | |
# Gradio Interface with dynamically generated dropdown options | |
iface = gr.Interface( | |
fn=recommend_courses, | |
inputs=[ | |
gr.Textbox("text", label="Enter expected skill"), | |
#gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"), | |
], | |
outputs="html", | |
live=True | |
) | |
# Launch the Gradio interface and save the output to an HTML file | |
iface.launch(share=True) | |