Spaces:
Sleeping
Sleeping
File size: 3,832 Bytes
214e401 796c174 214e401 7960b73 214e401 0804e95 214e401 796c174 214e401 796c174 214e401 796c174 214e401 019841a 214e401 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
from dotenv import load_dotenv
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
def extract_text_from_pdf(pdf):
pdf_reader = PdfReader(pdf)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_text_from_txt(txt):
text = txt.read().decode("utf-8")
return text
def extract_text_from_brain():
with open('brain/brain_journal.txt', 'r', encoding='utf-8') as file:
text = file.read()
return text
def main():
load_dotenv()
hide_streamlit_style = """
<style>
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.title("Digital Brain Journal Search π")
st.write("Ask any questions about any of your journal entries with OpenAI's Embeddings and Langchain. The virtual brain keeps track of everything in a user's life. If you have another TXT or PDF file you'd like to search for answers, click on the dropdown and select eithter TXT or PDF option in file type. Along with the response, you will also get information about the amount of tokens that were used and the Total Cost of the query.")
# Add API key input
api_key = st.text_input("Enter your OpenAI API key:", type="password")
os.environ["OPENAI_API_KEY"] = api_key
if not api_key:
st.warning("Please enter your OpenAI API key to continue.")
else:
file_type = st.selectbox("Choose the file type", options=["Brain", "PDF", "TXT"])
file = None
text = None
if file_type == "PDF":
file = st.file_uploader("Upload your PDF", type="pdf")
if file is not None:
text = extract_text_from_pdf(file)
elif file_type == "TXT":
file = st.file_uploader("Upload your TXT", type="txt")
if file is not None:
text = extract_text_from_txt(file)
elif file_type == "Brain":
text = extract_text_from_brain()
if file is not None or file_type == "Brain":
# split into chunks
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
# create embeddings
embeddings = OpenAIEmbeddings()
knowledge_base = FAISS.from_texts(chunks, embeddings)
# show user input
user_question = st.text_area("Ask a question about your document:")
if st.button("Submit"):
if user_question:
docs = knowledge_base.similarity_search(user_question)
llm = OpenAI()
chain = load_qa_chain(llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=user_question)
print(cb)
st.markdown("### Response:")
st.write(response)
st.write(cb)
st.markdown("---")
st.markdown("")
st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)
if __name__ == '__main__':
main()
|