File size: 3,675 Bytes
2397ad2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
from langchain.callbacks import get_openai_callback
from PyPDF2 import PdfReader
import json
import openai
import streamlit as st
import os
import requests


# Page configuration
st.set_page_config(page_title="PesaQ", page_icon="💸", layout="wide",)

#set Open-AI key
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]


# Sema Translator
def translate(userinput, target_lang, source_lang=None):
    if source_lang:
       url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_enter/"
       data = {
           "userinput": userinput,
           "source_lang": source_lang,
           "target_lang": target_lang,
        }
       response = requests.post(url, json=data)
       result = response.json()
       print(type(result))
       source_lange = source_lang
       translation = result['translated_text']
       return source_lange, translation
    else:
      url = "https://5d5c-44-208-85-154.ngrok-free.app/translate_detect/"
      data = {
        "userinput": userinput,
        "target_lang": target_lang,
      }

      response = requests.post(url, json=data)
      result = response.json()
      source_lange = result['source_language']
      translation = result['translated_text']
      return source_lange, translation


def main():
        st.title("📚 PesaDoc")
        # upload file
        pdf = st.file_uploader("Upload a financial Document and ask questions to get insights", type="pdf")

        # extract the text
        if pdf is not None:
          reader = PdfReader(pdf)
          pdf_text = ''
          for page in (reader.pages):
            text = page.extract_text()
            if text:
              pdf_text += text

          # Define our text splitter
          text_splitter = CharacterTextSplitter(
          separator = "\n",
          chunk_size = 1000, #thousand charctere
          chunk_overlap  = 200,
          length_function = len,
          )
          #Apply splitting
          text_chunks = text_splitter.split_text(pdf_text)

          # Use embeddings from OpenAI
          embeddings = OpenAIEmbeddings()
          #Convert text to embeddings
          pdf_embeddings = FAISS.from_texts(text_chunks, embeddings)
          chain = load_qa_chain(OpenAI(), chain_type="stuff")

          #user_question = st.text_input("Get insights into your finances ...")
          # show user input
          if "messages" not in st.session_state:
              st.session_state.messages = []

          for message in st.session_state.messages:
              with st.chat_message(message["role"]):
                st.markdown(message["content"])

          if user_question := st.chat_input("Ask your document anything ......?"):
            with st.chat_message("user"):
                st.markdown(user_question)
            user_langd, Queryd = translate(user_question, 'eng_Latn')
            st.session_state.messages.append({"role": "user", "content": user_question})
            docs = pdf_embeddings.similarity_search(Queryd)
            # print(len(docs))
            response = chain.run(input_documents=docs, question=Queryd)
            output = translate(response, user_langd, 'eng_Latn')[1]
            with st.chat_message("assistant"):
                st.markdown(output)
                st.session_state.messages.append({"role": "assistant", "content": output})


if __name__ == '__main__':
    main()