import subprocess # Upgrade pip, setuptools, and wheel subprocess.run(["pip", "install", "-U", "pip", "setuptools", "wheel"]) # Upgrade spaCy subprocess.run(["pip", "install", "-U", "spacy"]) # Download the spaCy language model subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) import gradio as gr import pandas as pd import numpy as np from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import pandas as pd from transformers import pipeline import pickle df = None # Open the file in binary mode with open('df_classifcation.pkl', 'rb') as file: # Call load method to deserialze df = pickle.load(file) model = SentenceTransformer('all-MiniLM-L6-v2') cities = df['locality'].unique() import spacy nlp = spacy.load("en_core_web_sm") cities = df['locality'].unique() def extract_city(query,cities): city = None doc = nlp(query) for ent in doc.ents: if ent.label_ == "GPE": # Geo-Political Entity # Assuming the entity is a city return ent.text print(cities) if city in cities: print(f"City found: {city}") else: print("No city found.") return city def filter_and_rank_by_similarity_sentiment_ranking(query, df, model,cities, k): city=None try: city = extract_city(qurey,cities) except: pass cities = df['locality'].unique() if city in cities: df = df.loc[df['locality'] == city] print(f"City: {city}") else: print("No city found.") query_embedding = model.encode(query) embeddings_matrix = np.stack(df['embedding'].values) similarities = cosine_similarity([query_embedding], embeddings_matrix).flatten() df['similarity'] = similarities df = df.loc[df.groupby('hotel_name')['similarity'].idxmax()] top_5_similar = df.nlargest(k, 'similarity').sort_values(by=['result'], ascending=False) return top_5_similar def gr_request(query): result = filter_and_rank_by_similarity_sentiment_ranking(query ,df, model, cities, 3) #print(result.head(5)) result = result[['hotel_name','locality','similarity','result','rate','review_text',"hotel_image"]].values result_images = [res[6] for res in result] result_texts = [f"Hotel name: {hotel[0]} \nCity: {hotel[1]}\nSimilarity score: {round(hotel[2],2)} \nSentiment score: {round(hotel[3],2)} \nRate: {hotel[4]}/5.0 \n\n\nBest Match Review:\n{hotel[5]}" for hotel in result] output_list = list() for i in range(len(result_images)): output_list.append(result_images[i]) output_list.append(result_texts[i]) return output_list demo = gr.Interface(fn=gr_request,inputs = ["text"], outputs=['image',"text",'image', "text",'image', "text"]) demo.launch()