Spaces:
Sleeping
Sleeping
FALSHEIKHI
commited on
Commit
•
a79dc7d
1
Parent(s):
edd16da
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spacy
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
import spacy
|
7 |
+
from sentence_transformers import SentenceTransformer
|
8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
+
import numpy as np
|
10 |
+
import pandas as pd
|
11 |
+
from transformers import pipeline
|
12 |
+
|
13 |
+
|
14 |
+
df = pd.read_csv("df_end.csv")
|
15 |
+
model = SentenceTransformer('all-MiniLM-L6-v2').to('cuda')
|
16 |
+
nlp = spacy.load("en_core_web_sm")
|
17 |
+
cities = df['locality'].unique()
|
18 |
+
|
19 |
+
|
20 |
+
def extract_city(query,cities):
|
21 |
+
city = None
|
22 |
+
doc = nlp(query)
|
23 |
+
for ent in doc.ents:
|
24 |
+
if ent.label_ == "GPE": # Geo-Political Entity
|
25 |
+
# Assuming the entity is a city
|
26 |
+
return ent.text
|
27 |
+
print(cities)
|
28 |
+
if city in cities :
|
29 |
+
df_to_test = df_to_test.loc[df_to_test['locality'] == city]
|
30 |
+
print(f"City found: {city}")
|
31 |
+
else:
|
32 |
+
print("No city found.")
|
33 |
+
return city
|
34 |
+
|
35 |
+
def filter_and_rank_by_similarity_sentiment_ranking(query, df, model,cities, k):
|
36 |
+
city = extract_city(query, cities)
|
37 |
+
cities = df['locality'].unique()
|
38 |
+
if city in cities:
|
39 |
+
df = df.loc[df['locality'] == city]
|
40 |
+
print(f"City: {city}")
|
41 |
+
else:
|
42 |
+
print("No city found.")
|
43 |
+
|
44 |
+
|
45 |
+
query_embedding = model.encode(query)
|
46 |
+
embeddings_matrix = np.stack(df['embedding'].values)
|
47 |
+
similarities = cosine_similarity([query_embedding], embeddings_matrix).flatten()
|
48 |
+
df['similarity'] = similarities
|
49 |
+
df = df.loc[df.groupby('hotel_name')['similarity'].idxmax()]
|
50 |
+
|
51 |
+
top_5_similar = df.nlargest(k, 'similarity').sort_values(by=['result'], ascending=False)
|
52 |
+
return top_5_similar
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
def gr_request(query):
|
61 |
+
result = filter_and_rank_by_similarity_sentiment_ranking(query ,df, model, cities, 3)
|
62 |
+
#print(result.head(5))
|
63 |
+
result = result[['hotel_name','locality','similarity','result','rate','review_text',"hotel_image"]].values
|
64 |
+
result_images = [res[6] for res in result]
|
65 |
+
result_texts = [f"Hotel name: {hotel[0]} \nCity: {hotel[1]}\nSimilarity score: {round(hotel[2],2)} \nSentiment score: {round(hotel[3],2)} \nRate: {hotel[4]}/5.0 \n\n\nBest Match Review:\n{hotel[5]}" for hotel in result]
|
66 |
+
|
67 |
+
output_list = list()
|
68 |
+
for i in range(len(result_images)):
|
69 |
+
output_list.append(result_images[i])
|
70 |
+
output_list.append(result_texts[i])
|
71 |
+
return output_list
|
72 |
+
|
73 |
+
|
74 |
+
demo = gr.Interface(fn=gr_request,inputs = ["text"], outputs=['image',"text",'image', "text",'image', "text"])
|
75 |
+
demo.launch()
|