GPT-knowledge-management

Runtime error

App Files Files Community

GPT-knowledge-management / search.py

Abhilashvj

Duplicate from Abhilashvj/haystack_QA

37468fe almost 2 years ago

raw

history blame contribute delete

2.01 kB



	import pinecone
	index_name = "abstractive-question-answering"

	# check if the abstractive-question-answering index exists
	if index_name not in pinecone.list_indexes():
	# create the index if it does not exist
	pinecone.create_index(
	index_name,
	dimension=768,
	metric="cosine"
	)

	# connect to abstractive-question-answering index we created
	index = pinecone.Index(index_name)

	# we will use batches of 64
	batch_size = 64

	for i in tqdm(range(0, len(df), batch_size)):
	# find end of batch
	i_end = min(i+batch_size, len(df))
	# extract batch
	batch = df.iloc[i:i_end]
	# generate embeddings for batch
	emb = retriever.encode(batch["passage_text"].tolist()).tolist()
	# get metadata
	meta = batch.to_dict(orient="records")
	# create unique IDs
	ids = [f"{idx}" for idx in range(i, i_end)]
	# add all to upsert list
	to_upsert = list(zip(ids, emb, meta))
	# upsert/insert these records to pinecone
	_ = index.upsert(vectors=to_upsert)

	# check that we have all vectors in index
	index.describe_index_stats()

	# from transformers import BartTokenizer, BartForConditionalGeneration

	# # load bart tokenizer and model from huggingface
	# tokenizer = BartTokenizer.from_pretrained('vblagoje/bart_lfqa')
	# generator = BartForConditionalGeneration.from_pretrained('vblagoje/bart_lfqa')

	# def query_pinecone(query, top_k):
	# # generate embeddings for the query
	# xq = retriever.encode([query]).tolist()
	# # search pinecone index for context passage with the answer
	# xc = index.query(xq, top_k=top_k, include_metadata=True)
	# return xc

	# def format_query(query, context):
	# # extract passage_text from Pinecone search result and add the tag
	# context = [f" {m['metadata']['passage_text']}" for m in context]
	# # concatinate all context passages
	# context = " ".join(context)
	# # contcatinate the query and context passages
	# query = f"question: {query} context: {context}"
	# return query