indrasn0wal commited on
Commit
2814bf2
·
verified ·
1 Parent(s): 86f1110

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -71
app.py CHANGED
@@ -2,70 +2,15 @@ import pandas as pd
2
  import numpy as np
3
  import torch
4
  from textblob import TextBlob
5
- data = pd.read_csv("flipkart_com-ecommerce_sample.csv")
6
- MAX_TEXT_LENGTH = 1000
7
- def auto_truncate(val):
8
- """Truncate the given text."""
9
- return val[:MAX_TEXT_LENGTH] if isinstance(val, str) else val
10
- all_prods_df = pd.read_csv("flipkart_com-ecommerce_sample.csv", converters={
11
- 'description': auto_truncate,
12
- 'product_specifications': auto_truncate,
13
- 'product_name': auto_truncate,
14
- 'product_category_tree': auto_truncate,
15
- })
16
-
17
- all_prods_df['product_specifications'].replace('', None, inplace=True)
18
- all_prods_df.dropna(subset=['product_specifications'], inplace=True)
19
-
20
- all_prods_df.reset_index(drop=True, inplace=True)
21
- NUMBER_PRODUCTS = 16000
22
-
23
- product_metadata = (
24
- all_prods_df
25
- .head(NUMBER_PRODUCTS)
26
- .to_dict(orient='index')
27
- )
28
- texts = [
29
- v['product_name'] for k, v in product_metadata.items()
30
- ]
31
-
32
- metadatas = list(product_metadata.values())
33
  import openai
 
 
 
 
34
  openai.api_key = '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'
 
35
  def get_embedding(text, model="text-embedding-ada-002"):
36
  return openai.embeddings.create(input=[text], model=model).data[0].embedding
37
-
38
- embeddings = [get_embedding(text) for text in texts]
39
- from pinecone import Pinecone
40
- pc = Pinecone(api_key="2c47d51e-211b-4611-8808-5510e07d1f94", environment="us-east-1")
41
- index = pc.Index('zepto')
42
- vectors = []
43
- for i, (embedding, metadata) in enumerate(zip(embeddings, metadatas)):
44
- vectors.append({
45
- 'id': str(i),
46
- 'values': embedding,
47
- 'metadata': {
48
- 'product_name': metadata.get('product_name', 'No name available'),
49
- 'product_url': metadata.get('product_url', 'No link available')
50
- }
51
- })
52
- import math
53
-
54
- def batch_upsert(index, vectors, batch_size=100):
55
- """Upsert vectors to Pinecone in batches."""
56
- num_batches = math.ceil(len(vectors) / batch_size)
57
-
58
- for i in range(num_batches):
59
- batch_start = i * batch_size
60
- batch_end = batch_start + batch_size
61
- batch_vectors = vectors[batch_start:batch_end]
62
-
63
- index.upsert(vectors=batch_vectors)
64
- print(f"Upserted batch {i + 1}/{num_batches}")
65
-
66
- batch_size = 50
67
- batch_upsert(index, vectors, batch_size=batch_size)
68
- from langdetect import detect
69
  def check_and_correct_spelling(query):
70
  blob = TextBlob(query)
71
  corrected_query = str(blob.correct())
@@ -100,22 +45,18 @@ def is_query_relevant(query, relevant_keywords):
100
  if keyword.lower() in query.lower():
101
  return True
102
  return False
103
- def search_in_pinecone(query):
104
- embedding = get_embedding(query)
105
- search_result = index.query(vector=embedding, top_k=5, include_metadata=True)
106
- return search_result
107
  def process_query(query):
108
  query = check_and_correct_spelling(query)
109
-
110
  query = correct_and_complete_query(query)
111
-
112
  query = translate_to_english(query)
113
-
114
  # Step 4: Check if the query is relevant
115
  # if not is_query_relevant(query):
116
  # return "The query is not relevant. Please enter a different query."
117
-
118
- return query
119
  def search_in_pinecone2(query):
120
  processed_query = process_query(query)
121
  embedding = get_embedding(query)
@@ -128,9 +69,9 @@ def search_in_pinecone2(query):
128
  score = result['score']
129
  result_string = f"Product: {product_name}\nLink: {product_link}\nScore: {score}\n"
130
  result_strings.append(result_string)
131
-
132
  return "\n".join(result_strings)
133
- import gradio as gr
134
  interface = gr.Interface(
135
  fn=search_in_pinecone2,
136
  inputs=gr.Textbox(label="Enter your query"),
 
2
  import numpy as np
3
  import torch
4
  from textblob import TextBlob
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import openai
6
+ import gradio as gr
7
+ from pinecone import Pinecone
8
+ from langdetect import detect
9
+
10
  openai.api_key = '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'
11
+ pc = Pinecone(api_key="2c47d51e-211b-4611-8808-5510e07d1f94")
12
  def get_embedding(text, model="text-embedding-ada-002"):
13
  return openai.embeddings.create(input=[text], model=model).data[0].embedding
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def check_and_correct_spelling(query):
15
  blob = TextBlob(query)
16
  corrected_query = str(blob.correct())
 
45
  if keyword.lower() in query.lower():
46
  return True
47
  return False
 
 
 
 
48
  def process_query(query):
49
  query = check_and_correct_spelling(query)
50
+
51
  query = correct_and_complete_query(query)
52
+
53
  query = translate_to_english(query)
54
+
55
  # Step 4: Check if the query is relevant
56
  # if not is_query_relevant(query):
57
  # return "The query is not relevant. Please enter a different query."
58
+
59
+ return query
60
  def search_in_pinecone2(query):
61
  processed_query = process_query(query)
62
  embedding = get_embedding(query)
 
69
  score = result['score']
70
  result_string = f"Product: {product_name}\nLink: {product_link}\nScore: {score}\n"
71
  result_strings.append(result_string)
72
+
73
  return "\n".join(result_strings)
74
+ index = pc.Index('quickstart')
75
  interface = gr.Interface(
76
  fn=search_in_pinecone2,
77
  inputs=gr.Textbox(label="Enter your query"),