File size: 2,609 Bytes
a79dc7d
 
 
 
 
 
 
 
 
2f9807e
a79dc7d
ca80d3f
 
 
1fd474e
ca80d3f
 
 
 
 
ffa9038
a79dc7d
 
 
1fd474e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a79dc7d
 
a3c3c9c
 
1fd474e
e9c8e3e
 
a79dc7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from transformers import pipeline
import pickle

df = None

# Open the file in binary mode 
with open('df_classifcation.pkl', 'rb') as file: 
      
    # Call load method to deserialze 
    df = pickle.load(file)

    
model = SentenceTransformer('all-MiniLM-L6-v2')
cities = df['locality'].unique()


import spacy

nlp = spacy.load("en_core_web_sm")
cities = df['locality'].unique()
def extract_city(query,cities):
    city = None
    doc = nlp(query)
    for ent in doc.ents:
        if ent.label_ == "GPE":  # Geo-Political Entity
            # Assuming the entity is a city
            return ent.text
    print(cities)
    if city in cities:
        
        print(f"City found: {city}")
    else:
        print("No city found.")
    return city

    



def filter_and_rank_by_similarity_sentiment_ranking(query, df, model,cities, k):
    city=None
    try:
        city = extract_city(qurey,cities)
    except:
        pass
    cities = df['locality'].unique()
    if city in cities:
        df = df.loc[df['locality'] == city]
        print(f"City: {city}")
    else:
        print("No city found.")

    query_embedding = model.encode(query)
    embeddings_matrix = np.stack(df['embedding'].values)
    similarities = cosine_similarity([query_embedding], embeddings_matrix).flatten()
    df['similarity'] = similarities
    df = df.loc[df.groupby('hotel_name')['similarity'].idxmax()]

    top_5_similar = df.nlargest(k, 'similarity').sort_values(by=['result'], ascending=False)
    return top_5_similar







def gr_request(query):
    result = filter_and_rank_by_similarity_sentiment_ranking(query ,df, model, cities, 3)
    #print(result.head(5))
    result = result[['hotel_name','locality','similarity','result','rate','review_text',"hotel_image"]].values
    result_images = [res[6] for res in result]
    result_texts = [f"Hotel name: {hotel[0]} \nCity: {hotel[1]}\nSimilarity score: {round(hotel[2],2)} \nSentiment score: {round(hotel[3],2)} \nRate: {hotel[4]}/5.0 \n\n\nBest Match Review:\n{hotel[5]}" for hotel in result]

    output_list = list()
    for i in range(len(result_images)):
        output_list.append(result_images[i])
        output_list.append(result_texts[i])
    return output_list


demo = gr.Interface(fn=gr_request,inputs = ["text"], outputs=['image',"text",'image', "text",'image', "text"])
demo.launch()