FALSHEIKHI's picture
Create app.py
a79dc7d verified
raw
history blame
2.45 kB
import gradio as gr
import spacy
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import spacy
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from transformers import pipeline
df = pd.read_csv("df_end.csv")
model = SentenceTransformer('all-MiniLM-L6-v2').to('cuda')
nlp = spacy.load("en_core_web_sm")
cities = df['locality'].unique()
def extract_city(query,cities):
city = None
doc = nlp(query)
for ent in doc.ents:
if ent.label_ == "GPE": # Geo-Political Entity
# Assuming the entity is a city
return ent.text
print(cities)
if city in cities :
df_to_test = df_to_test.loc[df_to_test['locality'] == city]
print(f"City found: {city}")
else:
print("No city found.")
return city
def filter_and_rank_by_similarity_sentiment_ranking(query, df, model,cities, k):
city = extract_city(query, cities)
cities = df['locality'].unique()
if city in cities:
df = df.loc[df['locality'] == city]
print(f"City: {city}")
else:
print("No city found.")
query_embedding = model.encode(query)
embeddings_matrix = np.stack(df['embedding'].values)
similarities = cosine_similarity([query_embedding], embeddings_matrix).flatten()
df['similarity'] = similarities
df = df.loc[df.groupby('hotel_name')['similarity'].idxmax()]
top_5_similar = df.nlargest(k, 'similarity').sort_values(by=['result'], ascending=False)
return top_5_similar
def gr_request(query):
result = filter_and_rank_by_similarity_sentiment_ranking(query ,df, model, cities, 3)
#print(result.head(5))
result = result[['hotel_name','locality','similarity','result','rate','review_text',"hotel_image"]].values
result_images = [res[6] for res in result]
result_texts = [f"Hotel name: {hotel[0]} \nCity: {hotel[1]}\nSimilarity score: {round(hotel[2],2)} \nSentiment score: {round(hotel[3],2)} \nRate: {hotel[4]}/5.0 \n\n\nBest Match Review:\n{hotel[5]}" for hotel in result]
output_list = list()
for i in range(len(result_images)):
output_list.append(result_images[i])
output_list.append(result_texts[i])
return output_list
demo = gr.Interface(fn=gr_request,inputs = ["text"], outputs=['image',"text",'image', "text",'image', "text"])
demo.launch()