Spaces:
Sleeping
Sleeping
judebebo32
commited on
Commit
•
69f5007
1
Parent(s):
b31d310
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import nltk
|
2 |
from nltk.corpus import stopwords
|
3 |
from nltk.stem import PorterStemmer
|
@@ -6,15 +7,26 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
import gradio as gr
|
8 |
|
9 |
-
#
|
10 |
nltk.download('punkt')
|
11 |
nltk.download('stopwords')
|
12 |
|
13 |
-
#
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
data = f.readlines()
|
16 |
|
17 |
-
#
|
|
|
|
|
|
|
|
|
18 |
def preprocess_text(text):
|
19 |
tokens = word_tokenize(text.lower())
|
20 |
stop_words = set(stopwords.words('english'))
|
@@ -23,27 +35,29 @@ def preprocess_text(text):
|
|
23 |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
|
24 |
return stemmed_tokens
|
25 |
|
26 |
-
# Create TF-IDF vectorizer
|
27 |
vectorizer = TfidfVectorizer(analyzer=preprocess_text)
|
28 |
tfidf_matrix = vectorizer.fit_transform(data)
|
29 |
|
30 |
-
#
|
31 |
def chatbot_response(user_input):
|
32 |
input_vector = vectorizer.transform([user_input])
|
33 |
cosine_similarities = cosine_similarity(input_vector, tfidf_matrix)
|
34 |
most_similar_index = cosine_similarities.argmax()
|
35 |
return data[most_similar_index].strip()
|
36 |
|
37 |
-
#
|
38 |
def chatbot_interface(user_input):
|
39 |
response = chatbot_response(user_input)
|
40 |
return response
|
41 |
|
|
|
42 |
iface = gr.Interface(fn=chatbot_interface,
|
43 |
inputs="text",
|
44 |
outputs="text",
|
45 |
title="FAQ Chatbot",
|
46 |
-
description="
|
47 |
|
|
|
48 |
if __name__ == "__main__":
|
49 |
iface.launch()
|
|
|
1 |
+
import os
|
2 |
import nltk
|
3 |
from nltk.corpus import stopwords
|
4 |
from nltk.stem import PorterStemmer
|
|
|
7 |
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
import gradio as gr
|
9 |
|
10 |
+
# Ensure necessary NLTK downloads
|
11 |
nltk.download('punkt')
|
12 |
nltk.download('stopwords')
|
13 |
|
14 |
+
# Path to the dataset file
|
15 |
+
file_path = 'my_text_file.txt'
|
16 |
+
|
17 |
+
# Check if the file exists
|
18 |
+
if not os.path.exists(file_path):
|
19 |
+
raise FileNotFoundError(f"{file_path} not found in the environment.")
|
20 |
+
|
21 |
+
# Load the dataset
|
22 |
+
with open(file_path, 'r') as f:
|
23 |
data = f.readlines()
|
24 |
|
25 |
+
# Ensure the data is loaded correctly
|
26 |
+
if not data:
|
27 |
+
raise ValueError("The dataset is empty or could not be loaded properly.")
|
28 |
+
|
29 |
+
# Preprocessing function for text
|
30 |
def preprocess_text(text):
|
31 |
tokens = word_tokenize(text.lower())
|
32 |
stop_words = set(stopwords.words('english'))
|
|
|
35 |
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
|
36 |
return stemmed_tokens
|
37 |
|
38 |
+
# Create a TF-IDF vectorizer
|
39 |
vectorizer = TfidfVectorizer(analyzer=preprocess_text)
|
40 |
tfidf_matrix = vectorizer.fit_transform(data)
|
41 |
|
42 |
+
# Chatbot response function
|
43 |
def chatbot_response(user_input):
|
44 |
input_vector = vectorizer.transform([user_input])
|
45 |
cosine_similarities = cosine_similarity(input_vector, tfidf_matrix)
|
46 |
most_similar_index = cosine_similarities.argmax()
|
47 |
return data[most_similar_index].strip()
|
48 |
|
49 |
+
# Gradio interface
|
50 |
def chatbot_interface(user_input):
|
51 |
response = chatbot_response(user_input)
|
52 |
return response
|
53 |
|
54 |
+
# Create a Gradio interface for the chatbot
|
55 |
iface = gr.Interface(fn=chatbot_interface,
|
56 |
inputs="text",
|
57 |
outputs="text",
|
58 |
title="FAQ Chatbot",
|
59 |
+
description="Ask a question to the FAQ chatbot.")
|
60 |
|
61 |
+
# Launch the Gradio app
|
62 |
if __name__ == "__main__":
|
63 |
iface.launch()
|