#!pip install -qU langchain-community faiss-cpu faiss-gpu langchain-openai sentence_transformers gradio import faiss from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings import os import pandas as pd from uuid import uuid4 from langchain_core.documents import Document import numpy as np #from sentence_transformers import SentenceTransformer from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.llms import OpenAI from langchain_core.prompts import ChatPromptTemplate from langchain import PromptTemplate import gradio as gr df = pd.read_csv('news_paper-Cleaned.csv', encoding='utf-8', on_bad_lines='skip') os.environ["OPENAI_API_KEY"] = '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' embeddings = OpenAIEmbeddings(model="text-embedding-3-large") index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))) vector_store = FAISS( embedding_function=embeddings, index=index, docstore=InMemoryDocstore(), index_to_docstore_id={}, ) documents = [{ 'title': row['title'], 'author': row['author'], 'description': row['description'], 'full_text' : row['full_text'] } for _, row in df.iterrows()] full_text = [Document( page_content=str(doc), metadata={"source": "news"}, ) for doc in documents] text_splitter = RecursiveCharacterTextSplitter( # Set a really small chunk size, just to show. chunk_size=1000, chunk_overlap=100, length_function=len, is_separator_regex=False, ) text_split = text_splitter.split_documents(full_text) uuids = [str(uuid4()) for _ in range(len(text_split))] vector_store.add_documents(documents=text_split, ids=uuids) retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10}) def questions(query): template = """ You are a helpful assistant that that can answer questions about specific data. You have answer only from this Context. You will receive 10 Answer return all and spilt between them by new line. Question: {question} Context: {context} Answer: """ PROMPT = PromptTemplate(template=template, input_variables=['question', 'context']) qa_chain = RetrievalQA.from_chain_type( llm=OpenAI(), retriever=retriever, chain_type_kwargs={"prompt": PROMPT}, ) return qa_chain({"query": query})['result'] demo = gr.Interface(fn=questions, inputs="text", outputs="text") demo.launch()