Spaces:
Sleeping
Sleeping
import os | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import PorterStemmer | |
from nltk.tokenize import word_tokenize | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import gradio as gr | |
# Ensure necessary NLTK downloads | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Path to the dataset file | |
file_path = 'my_text_file.txt' | |
# Check if the file exists | |
if not os.path.exists(file_path): | |
raise FileNotFoundError(f"{file_path} not found in the environment.") | |
# Load the dataset | |
with open(file_path, 'r') as f: | |
data = f.readlines() | |
# Ensure the data is loaded correctly | |
if not data: | |
raise ValueError("The dataset is empty or could not be loaded properly.") | |
# Preprocessing function for text | |
def preprocess_text(text): | |
tokens = word_tokenize(text.lower()) | |
stop_words = set(stopwords.words('english')) | |
filtered_tokens = [word for word in tokens if word not in stop_words] | |
stemmer = PorterStemmer() | |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens] | |
return stemmed_tokens | |
# Create a TF-IDF vectorizer | |
vectorizer = TfidfVectorizer(analyzer=preprocess_text) | |
tfidf_matrix = vectorizer.fit_transform(data) | |
# Chatbot response function | |
def chatbot_response(user_input): | |
input_vector = vectorizer.transform([user_input]) | |
cosine_similarities = cosine_similarity(input_vector, tfidf_matrix) | |
most_similar_index = cosine_similarities.argmax() | |
return data[most_similar_index].strip() | |
# Gradio interface | |
def chatbot_interface(user_input): | |
response = chatbot_response(user_input) | |
return response | |
# Create a Gradio interface for the chatbot | |
iface = gr.Interface(fn=chatbot_interface, | |
inputs="text", | |
outputs="text", | |
title="FAQ Chatbot", | |
description="Ask a question to the FAQ chatbot.") | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
iface.launch() | |