Spaces:
Runtime error
Runtime error
File size: 9,491 Bytes
f1cd7ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import speech_recognition as sr
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import spacy, os
import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from autocorrect import Speller
from datetime import datetime
from transformers import pipeline
from translate import Translator
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk.corpus import wordnet
from googletrans import Translator
import pickle
class recommendationModel:
def __init__(self):
self.translator = Translator()
self.zero_shot_classifier = pipeline('zero-shot-classification', model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
self.spell_checker = Speller(lang='en')
self.porter = PorterStemmer()
self.lemmatizer = WordNetLemmatizer()
self.nlp = spacy.load("en_core_web_sm")
# self.spell_checker = Speller(lang='en')
self.class_names = ["positive :)", "neutral :|", "negative :("]
self.data1 = None
def detect_language(self,user_input):
det = self.translator.detect(user_input)
if det.lang!='en':
trans = self.translator.translate(user_input,'en')
print("\nTranslation:",trans.text)
return trans.text
else:
return user_input
def remove_stopwords(self,tags):
words = word_tokenize(tags)
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
filtered_text = " ".join(filtered_words)
return filtered_text
def correct_spelling(self,word):
return self.spell_checker(word)
def porterStemmer(self,text):
words = word_tokenize(text)
stemmed_words = [self.porter.stem(word) for word in words]
stemmed_sentence = ' '.join(stemmed_words)
return stemmed_sentence
def correct_spellings_in_text(self,text):
words = nltk.word_tokenize(text)
corrected_words = [self.correct_spelling(word) for word in words]
corrected_text = " ".join(corrected_words)
return corrected_text
def preprocess_input(self,userInput):
corrected_text = self.correct_spellings_in_text(userInput)
words = nltk.word_tokenize(corrected_text.lower())
sentence = " ".join(words)
sentence = self.remove_stopwords(sentence)
# sentence = porterStemmer(sentence)
keywords = nltk.word_tokenize(sentence.lower())
return keywords, sentence
def calculate_score(self,about, keywords):
score = 0
for keyword in keywords:
if keyword in about.lower():
score += 1
return score
def zero_shot_classifier_sent(self,userInput):
zsc_output = self.zero_shot_classifier(userInput, self.class_names)
zsc_labels = zsc_output['labels']
zsc_scores = zsc_output['scores']
return zsc_labels, zsc_scores
def recommendArticle(self,userInput,tfidf_scores,output_csv):
zsc_labels, zsc_scores = self.zero_shot_classifier_sent(userInput)
label_score_pairs = zip(zsc_labels, zsc_scores)
max_label, max_score = max(label_score_pairs, key=lambda pair: pair[1])
userInput = self.detect_language(userInput) #change to english
keywords, sentence = self.preprocess_input(userInput)
self.data1['score'] = self.data1['description'].apply(lambda x: self.calculate_score(x, keywords))
# Sort articles based on score
recommended_articles = self.data1.sort_values(by='score', ascending=False)
print("\n*****************\nRecommended Articles:")
for index, row in recommended_articles.head(10).iterrows():
print(f"\nTitle: {row['title']}")
print(f"Keywords: {row['keywords']}")
print(f"Class: {row['class']}")
print(f"URL: {row['url']}")
# Prepare data to append to CSV
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
output_data = {
'Timestamp': timestamp,
'User Input': userInput,
'Emotion': max_label,
'Sentiment Score': max_score,
'Keywords': ", ".join(keywords)}
# Append output data to CSV
output_df = pd.DataFrame(output_data, index=[0])
output_df.to_csv(output_csv, mode='a', header=not os.path.exists(output_csv), index=False)
def convert_audio_to_text(self,recognizer, source, duration):
print("Listening for audio...")
audio_data = recognizer.listen(source, timeout=duration, phrase_time_limit=duration)
try:
text = recognizer.recognize_google(audio_data)
return text
except sr.WaitTimeoutError:
print("Listening timed out. No speech detected.")
return ""
except sr.UnknownValueError:
print("Oops, it seems we're having trouble understanding the audio. Let's try again with clearer sound.")
return ""
except sr.RequestError as e:
print(f"Could not request results; {e}")
return ""
def extract_keywords_tfidf(self,article_descriptions):
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(article_descriptions)
feature_names = tfidf_vectorizer.get_feature_names_out()
article_tfidf_scores = tfidf_matrix[0].toarray().flatten()
keyword_scores = dict(zip(feature_names, article_tfidf_scores))
return keyword_scores
def main(self,inputs):
output_csv = "Output2.csv" # Specify the output CSV file
print("Choose input method:\n1. Text\n2. Voice\n3. Audio File")
while True:
choice = input("\nEnter your choice (1 or 2 or 3): ")
if choice == '1':
user_input1 = input("Enter your message: ")
user_input1 = self.detect_language(user_input1)
inputs.append(user_input1)
user_input = ' '.join(inputs)
print(user_input)
print("\nProcessing....")
tfidf_scores = self.extract_keywords_tfidf(self.data1['description'])
self.recommendArticle(user_input, tfidf_scores, output_csv)
break
elif choice == '2':
recognizer = sr.Recognizer()
with sr.Microphone() as source:
recognizer.adjust_for_ambient_noise(source) # Adjust for ambient noise
text1 = self.convert_audio_to_text(recognizer, source, 15)
if text1:
text = self.detect_language(text1)
inputs.append(text1)
text = ' '.join(inputs)
print(text)
print("\nProcessing....")
tfidf_scores = self.extract_keywords_tfidf(self.data1['description'])
self.recommendArticle(text, tfidf_scores, output_csv)
break
else:
print("Oops, it seems we're having trouble understanding the audio. Let's try again with clearer sound.")
elif choice == '3':
filename = input("Enter the path to the audio file: ")
recognizer = sr.Recognizer()
with sr.AudioFile(filename) as source:
recognizer.adjust_for_ambient_noise(source) # Adjust for ambient noise
text1 = self.convert_audio_to_text(recognizer, source, 1000)
if text1:
text = self.detect_language(text1)
inputs.append(text1)
text = ' '.join(inputs)
print(text)
print("\nProcessing....")
tfidf_scores = self.extract_keywords_tfidf(self.data1['description'])
self.recommendArticle(text, tfidf_scores, output_csv)
break
else:
print("Oops, it seems we're having trouble finding the file. Let's try again with the correct path.")
else:
print("Invalid choice. Please enter 1 or 2 or 3.")
#PROPER PICKLING AND UNPICKLING ATTRIBUTES
def __getstate__(self):
# Exclude specific attributes from being pickled
excluded_attrs = ['translator', 'zero_shot_classifier', 'nlp'] # Add other attributes here if needed
state = self.__dict__.copy()
for attr in excluded_attrs:
if attr in state:
del state[attr]
return state
def __setstate__(self, state):
# Restore the state and recreate excluded attributes
self.__dict__.update(state)
self.translator = Translator() # Recreate translator
self.zero_shot_classifier = pipeline('zero-shot-classification', model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli") # Recreate zero_shot_classifier
self.nlp = spacy.load("en_core_web_sm") # Recreate nlp
# Recreate other excluded attributes here if needed
model = recommendationModel()
with open('model2.pkl', 'wb') as f:
pickle.dump(model, f) |