Spaces:

skolvankar
/

meraspace

Runtime error

App Files Files Community

meraspace / 1app.py

skolvankar

Add application file

8508bc5 about 1 year ago

raw

history blame

3.58 kB

	# -- coding: utf-8 --
	"""Skill Transformation Journey.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
	"""

	import gradio as gr
	import re
	import openai
	from openai import OpenAI
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import linear_kernel

	client = OpenAI(
	# defaults to os.environ.get("OPENAI_API_KEY")
	api_key="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",
	)

	# Specify the sheet name in the Excel file
	excel_file_path = "1.csv"
	sheet_name = "Shortlisted Courses" # Replace with the actual sheet name

	# Read the Excel file into a Pandas DataFrame
	courses_df = pd.read_csv(excel_file_path)

	# Create a TF-IDF vectorizer
	tfidf_vectorizer = TfidfVectorizer(stop_words='english')
	tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))

	def html_coversion(gpt_content):

	# Provided data in text format
	data_text = gpt_content

	# Extract course details using a modified regular expression
	courses = re.findall(r'(\d+)\. (.?):\n\s- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)

	# Process each tuple to remove the second occurrence of the course link
	processed_courses = []
	for course_tuple in courses:
	# Find the index of the second occurrence of the course link
	index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
	# Remove the second occurrence of the course link from the tuple
	processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
	processed_courses.append(processed_tuple)

	# Convert the processed list of tuples into a DataFrame
	df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link'])

	# Convert the DataFrame to an HTML table
	html_table = df.to_html(index=False, escape=False)

	# Print or save the HTML table
	return html_table

	# Function to recommend courses based on user input using GPT and TF-IDF
	def recommend_courses(user_skill):
	# Combine user's input into a single string for TF-IDF
	user_input = f"{user_skill}"

	# Use TF-IDF and cosine similarity for initial course recommendations
	user_vector = tfidf_vectorizer.transform([user_input])
	cosine_similarities = linear_kernel(user_vector, tfidf_matrix)

	# Get initial course recommendations based on similarity scores
	recommendations = courses_df.copy()
	recommendations['Similarity'] = cosine_similarities[0]

	# Sort by similarity and get top recommendations
	top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)

	# Generate a text summary of the initial recommendations
	initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)


	# Assume GPT generates HTML-formatted final recommendations
	final_recommendations_html = html_coversion(initial_recommendations_text)

	return final_recommendations_html

	# Gradio Interface with dynamically generated dropdown options
	iface = gr.Interface(
	fn=recommend_courses,
	inputs=[
	gr.Textbox("text", label="Enter expected skill"),
	#gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
	],
	outputs="html",
	live=True
	)

	# Launch the Gradio interface and save the output to an HTML file
	iface.launch(share=True)