File size: 1,505 Bytes
83bd7b4
accf30e
 
 
 
 
 
 
 
 
 
 
 
 
82ae4d8
accf30e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28a53d3
 
ca2f8d5
 
 
 
 
 
 
 
 
 
 
 
24c237f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
from datasets import load_dataset
import os
import gradio as gr
from transformers import pipeline
from datasets import DatasetDict

os.environ["OPENAI_API_KEY"] = "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"

reader = load_dataset("orderlymirror/The_48_Laws_Of_Power")
reader = PdfReader('48lawsofpower.pdf')

raw_text = ''
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text
        
        
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

embeddings = OpenAIEmbeddings()

docsearch = FAISS.from_texts(texts, embeddings)

from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
        
chain = load_qa_chain(OpenAI(), chain_type="stuff")

def interactive_search(query):
    docs = docsearch.similarity_search(query)
   
    result = chain.run(input_documents=docs, question=query)
    
    return result

iface = gr.Interface(interactive_search, 
                     inputs="text",
                     outputs="text",
                     title="Interactive Search",
                     description="Enter a query.")
iface.launch()