text_gen2 / app.py
orderlymirror's picture
Update app.py
ca2f8d5 verified
raw
history blame
1.51 kB
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
from datasets import load_dataset
import os
import gradio as gr
from transformers import pipeline
from datasets import DatasetDict
os.environ["OPENAI_API_KEY"] = "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"
reader = load_dataset("orderlymirror/The_48_Laws_Of_Power")
reader = PdfReader('48lawsofpower.pdf')
raw_text = ''
for i, page in enumerate(reader.pages):
text = page.extract_text()
if text:
raw_text += text
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
texts = text_splitter.split_text(raw_text)
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
chain = load_qa_chain(OpenAI(), chain_type="stuff")
def interactive_search(query):
docs = docsearch.similarity_search(query)
result = chain.run(input_documents=docs, question=query)
return result
iface = gr.Interface(interactive_search,
inputs="text",
outputs="text",
title="Interactive Search",
description="Enter a query.")
iface.launch()