Spaces:
Sleeping
Sleeping
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import PorterStemmer | |
from nltk.tokenize import word_tokenize | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from flask import Flask, request, jsonify | |
# Initialize the Flask app | |
app = Flask(__name__) | |
# Download necessary NLTK data | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Load customer inquiries dataset | |
with open('my_text_file.txt', 'r') as f: | |
data = f.readlines() | |
# Preprocess data | |
def preprocess_text(text): | |
tokens = word_tokenize(text.lower()) | |
stop_words = set(stopwords.words('english')) | |
filtered_tokens = [word for word in tokens if word not in stop_words] | |
stemmer = PorterStemmer() | |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens] | |
return stemmed_tokens | |
# Create TF-IDF vectorizer | |
vectorizer = TfidfVectorizer(analyzer=preprocess_text) | |
tfidf_matrix = vectorizer.fit_transform(data) | |
# Define chatbot logic | |
def chatbot_response(user_input): | |
preprocessed_input = preprocess_text(user_input) | |
input_vector = vectorizer.transform([user_input]) | |
cosine_similarities = cosine_similarity(input_vector, tfidf_matrix) | |
most_similar_index = cosine_similarities.argmax() | |
return data[most_similar_index].strip() | |
# Define routes | |
def home(): | |
return "Welcome to the Chatbot! Send a POST request to /chat with your message." | |
def chat(): | |
user_input = request.json.get('message') | |
if user_input: | |
response = chatbot_response(user_input) | |
return jsonify({'response': response}) | |
else: | |
return jsonify({'error': 'No message provided'}), 400 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=8080) | |