Spaces:
Sleeping
Sleeping
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import PorterStemmer | |
from nltk.tokenize import word_tokenize | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import gradio as gr | |
# Download necessary NLTK data | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Load customer inquiries dataset | |
with open('my_text_file.txt', 'r') as f: | |
data = f.readlines() | |
# Preprocess data | |
def preprocess_text(text): | |
tokens = word_tokenize(text.lower()) | |
stop_words = set(stopwords.words('english')) | |
filtered_tokens = [word for word in tokens if word not in stop_words] | |
stemmer = PorterStemmer() | |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens] | |
return stemmed_tokens | |
# Create TF-IDF vectorizer | |
vectorizer = TfidfVectorizer(analyzer=preprocess_text) | |
tfidf_matrix = vectorizer.fit_transform(data) | |
# Define chatbot logic | |
def chatbot_response(user_input): | |
input_vector = vectorizer.transform([user_input]) | |
cosine_similarities = cosine_similarity(input_vector, tfidf_matrix) | |
most_similar_index = cosine_similarities.argmax() | |
return data[most_similar_index].strip() | |
# Create Gradio interface | |
def chatbot_interface(user_input): | |
response = chatbot_response(user_input) | |
return response | |
iface = gr.Interface(fn=chatbot_interface, | |
inputs="text", | |
outputs="text", | |
title="FAQ Chatbot", | |
description="Enter a question to get a response from the chatbot based on the preloaded FAQ data.") | |
if __name__ == "__main__": | |
iface.launch() | |