Spaces:

indrasn0wal
/

Zepto_chatbot

Runtime error

App Files Files Community

Zepto_chatbot / app.py

indrasn0wal

Update app.py

e886d9c verified 6 months ago

raw

history blame

5.11 kB

	import pandas as pd
	import numpy as np
	import torch
	from textblob import TextBlob
	data = pd.read_csv("flipkart_com-ecommerce_sample.csv")
	MAX_TEXT_LENGTH = 1000
	def auto_truncate(val):
	"""Truncate the given text."""
	return val[:MAX_TEXT_LENGTH] if isinstance(val, str) else val
	all_prods_df = pd.read_csv("flipkart_com-ecommerce_sample.csv", converters={
	'description': auto_truncate,
	'product_specifications': auto_truncate,
	'product_name': auto_truncate,
	'product_category_tree': auto_truncate,
	})

	all_prods_df['product_specifications'].replace('', None, inplace=True)
	all_prods_df.dropna(subset=['product_specifications'], inplace=True)

	all_prods_df.reset_index(drop=True, inplace=True)
	NUMBER_PRODUCTS = 16000

	product_metadata = (
	all_prods_df
	.head(NUMBER_PRODUCTS)
	.to_dict(orient='index')
	)
	texts = [
	v['product_name'] for k, v in product_metadata.items()
	]

	metadatas = list(product_metadata.values())
	!pip install openai
	import openai
	openai.api_key = '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'
	def get_embedding(text, model="text-embedding-ada-002"):
	return openai.embeddings.create(input=[text], model=model).data[0].embedding

	embeddings = [get_embedding(text) for text in texts]
	!pip install pinecone
	from pinecone import Pinecone
	pc = Pinecone(api_key="2c47d51e-211b-4611-8808-5510e07d1f94", environment="us-east-1")
	index = pc.Index('zepto')
	vectors = []
	for i, (embedding, metadata) in enumerate(zip(embeddings, metadatas)):
	vectors.append({
	'id': str(i),
	'values': embedding,
	'metadata': {
	'product_name': metadata.get('product_name', 'No name available'),
	'product_url': metadata.get('product_url', 'No link available')
	}
	})
	import math

	def batch_upsert(index, vectors, batch_size=100):
	"""Upsert vectors to Pinecone in batches."""
	num_batches = math.ceil(len(vectors) / batch_size)

	for i in range(num_batches):
	batch_start = i * batch_size
	batch_end = batch_start + batch_size
	batch_vectors = vectors[batch_start:batch_end]

	index.upsert(vectors=batch_vectors)
	print(f"Upserted batch {i + 1}/{num_batches}")

	batch_size = 50
	batch_upsert(index, vectors, batch_size=batch_size)
	from langdetect import detect
	def check_and_correct_spelling(query):
	blob = TextBlob(query)
	corrected_query = str(blob.correct())
	return corrected_query
	def correct_and_complete_query(text):
	blob = TextBlob(text)
	corrected_text = str(blob.correct())

	# Use OpenAI to complete the query
	completion_prompt = f"Complete the following query in a way that is related to product search: '{corrected_text}'"
	response = openai.completions.create(
	model="gpt-3.5-turbo-instruct",
	prompt=completion_prompt,
	max_tokens=100,
	temperature=0.5
	)

	return response.choices[0].text.strip()
	def translate_to_english(text):
	if detect(text) != 'en':
	translation_prompt = f"Translate the following text to English:\n\n'{text}'"
	response = openai.completions.create(
	model="gpt-3.5-turbo-instruct",
	prompt=translation_prompt,
	max_tokens=100,
	temperature=0.5
	)
	return response.choices[0].text.strip()
	return text
	def is_query_relevant(query, relevant_keywords):
	for keyword in relevant_keywords:
	if keyword.lower() in query.lower():
	return True
	return False
	def search_in_pinecone(query):
	embedding = get_embedding(query)
	search_result = index.query(vector=embedding, top_k=5, include_metadata=True)
	return search_result
	def process_query(query):
	query = check_and_correct_spelling(query)

	query = correct_and_complete_query(query)

	query = translate_to_english(query)

	# Step 4: Check if the query is relevant
	# if not is_query_relevant(query):
	# return "The query is not relevant. Please enter a different query."

	return query
	def search_in_pinecone2(query):
	processed_query = process_query(query)
	embedding = get_embedding(query)
	search_results = index.query(vector=embedding, top_k=5, include_metadata=True)

	result_strings = []
	for result in search_results['matches']:
	product_name = result['metadata'].get('product_name', 'No name available')
	product_link = result['metadata'].get('product_url', 'No link available')
	score = result['score']
	result_string = f"Product: {product_name}\nLink: {product_link}\nScore: {score}\n"
	result_strings.append(result_string)

	return "\n".join(result_strings)
	import gradio as gr
	interface = gr.Interface(
	fn=search_in_pinecone2,
	inputs=gr.Textbox(label="Enter your query"),
	outputs=gr.Textbox(label="Top 5 Similar Products"),
	title="Product Similarity Search",
	description="Enter a query to find the top 5 similar products based on your search."
	)

	# Launch the interface
	interface.launch()