Spaces:
Sleeping
Sleeping
import subprocess | |
# Upgrade pip, setuptools, and wheel | |
subprocess.run(["pip", "install", "-U", "pip", "setuptools", "wheel"]) | |
# Upgrade spaCy | |
subprocess.run(["pip", "install", "-U", "spacy"]) | |
# Download the spaCy language model | |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
import pandas as pd | |
from transformers import pipeline | |
import pickle | |
df = None | |
# Open the file in binary mode | |
with open('df_classifcation.pkl', 'rb') as file: | |
# Call load method to deserialze | |
df = pickle.load(file) | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
cities = df['locality'].unique() | |
import spacy | |
nlp = spacy.load("en_core_web_sm") | |
cities = df['locality'].unique() | |
def extract_city(query,cities): | |
city = None | |
doc = nlp(query) | |
for ent in doc.ents: | |
if ent.label_ == "GPE": # Geo-Political Entity | |
# Assuming the entity is a city | |
return ent.text | |
print(cities) | |
if city in cities: | |
print(f"City found: {city}") | |
else: | |
print("No city found.") | |
return city | |
def filter_and_rank_by_similarity_sentiment_ranking(query, df, model,cities, k): | |
city=None | |
try: | |
city = extract_city(qurey,cities) | |
except: | |
pass | |
cities = df['locality'].unique() | |
if city in cities: | |
df = df.loc[df['locality'] == city] | |
print(f"City: {city}") | |
else: | |
print("No city found.") | |
query_embedding = model.encode(query) | |
embeddings_matrix = np.stack(df['embedding'].values) | |
similarities = cosine_similarity([query_embedding], embeddings_matrix).flatten() | |
df['similarity'] = similarities | |
df = df.loc[df.groupby('hotel_name')['similarity'].idxmax()] | |
top_5_similar = df.nlargest(k, 'similarity').sort_values(by=['result'], ascending=False) | |
return top_5_similar | |
def gr_request(query): | |
result = filter_and_rank_by_similarity_sentiment_ranking(query ,df, model, cities, 3) | |
#print(result.head(5)) | |
result = result[['hotel_name','locality','similarity','result','rate','review_text',"hotel_image"]].values | |
result_images = [res[6] for res in result] | |
result_texts = [f"Hotel name: {hotel[0]} \nCity: {hotel[1]}\nSimilarity score: {round(hotel[2],2)} \nSentiment score: {round(hotel[3],2)} \nRate: {hotel[4]}/5.0 \n\n\nBest Match Review:\n{hotel[5]}" for hotel in result] | |
output_list = list() | |
for i in range(len(result_images)): | |
output_list.append(result_images[i]) | |
output_list.append(result_texts[i]) | |
return output_list | |
demo = gr.Interface(fn=gr_request,inputs = ["text"], outputs=['image',"text",'image', "text",'image', "text"]) | |
demo.launch() | |