import nltk from nltk.corpus import stopwords from nltk.stem import PorterStemmer from nltk.tokenize import word_tokenize from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import gradio as gr # Download necessary NLTK data nltk.download('punkt') nltk.download('stopwords') # Load customer inquiries dataset with open('my_text_file.txt', 'r') as f: data = f.readlines() # Preprocess data def preprocess_text(text): tokens = word_tokenize(text.lower()) stop_words = set(stopwords.words('english')) filtered_tokens = [word for word in tokens if word not in stop_words] stemmer = PorterStemmer() stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens] return stemmed_tokens # Create TF-IDF vectorizer vectorizer = TfidfVectorizer(analyzer=preprocess_text) tfidf_matrix = vectorizer.fit_transform(data) # Define chatbot logic def chatbot_response(user_input): input_vector = vectorizer.transform([user_input]) cosine_similarities = cosine_similarity(input_vector, tfidf_matrix) most_similar_index = cosine_similarities.argmax() return data[most_similar_index].strip() # Create Gradio interface def chatbot_interface(user_input): response = chatbot_response(user_input) return response iface = gr.Interface(fn=chatbot_interface, inputs="text", outputs="text", title="FAQ Chatbot", description="Enter a question to get a response from the chatbot based on the preloaded FAQ data.") if __name__ == "__main__": iface.launch()