File size: 3,832 Bytes
214e401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796c174
 
214e401
 
 
 
 
 
 
 
 
 
 
 
 
7960b73
214e401
 
0804e95
214e401
 
 
 
 
796c174
214e401
 
 
 
 
 
 
 
 
 
 
 
796c174
 
214e401
796c174
214e401
 
 
 
 
 
 
 
 
 
 
 
 
 
019841a
214e401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from dotenv import load_dotenv
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback

def extract_text_from_pdf(pdf):
    pdf_reader = PdfReader(pdf)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text


def extract_text_from_txt(txt):
    text = txt.read().decode("utf-8")
    return text


def extract_text_from_brain():
    with open('brain/brain_journal.txt', 'r', encoding='utf-8') as file:
        text = file.read()
    return text


def main():
    load_dotenv()
    hide_streamlit_style = """
                <style>
                footer {visibility: hidden;}
                </style>
                """
    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
    st.title("Digital Brain Journal Search πŸ”")
    st.write("Ask any questions about any of your journal entries with OpenAI's Embeddings and Langchain. The virtual brain keeps track of everything in a user's life. If you have another TXT or PDF file you'd like to search for answers, click on the dropdown and select eithter TXT or PDF option in file type. Along with the response, you will also get information about the amount of tokens that were used and the Total Cost of the query.")

    # Add API key input
    api_key = st.text_input("Enter your OpenAI API key:", type="password")
    os.environ["OPENAI_API_KEY"] = api_key

    if not api_key:
        st.warning("Please enter your OpenAI API key to continue.")
    else:
        file_type = st.selectbox("Choose the file type", options=["Brain", "PDF", "TXT"])

        file = None
        text = None

        if file_type == "PDF":
            file = st.file_uploader("Upload your PDF", type="pdf")
            if file is not None:
                text = extract_text_from_pdf(file)
        elif file_type == "TXT":
            file = st.file_uploader("Upload your TXT", type="txt")
            if file is not None:
                text = extract_text_from_txt(file)
        elif file_type == "Brain":
            text = extract_text_from_brain()

        if file is not None or file_type == "Brain":
            # split into chunks
            text_splitter = CharacterTextSplitter(
                separator="\n",
                chunk_size=1000,
                chunk_overlap=200,
                length_function=len
            )
            chunks = text_splitter.split_text(text)

            # create embeddings
            embeddings = OpenAIEmbeddings()
            knowledge_base = FAISS.from_texts(chunks, embeddings)

            # show user input
            user_question = st.text_area("Ask a question about your document:")

            if st.button("Submit"):
                if user_question:
                    docs = knowledge_base.similarity_search(user_question)

                    llm = OpenAI()
                    chain = load_qa_chain(llm, chain_type="stuff")
                    with get_openai_callback() as cb:
                        response = chain.run(input_documents=docs, question=user_question)
                        print(cb)

                    st.markdown("### Response:")
                    st.write(response)
                    st.write(cb)
    st.markdown("---")
    st.markdown("")
    st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)


if __name__ == '__main__':
    main()