farhananis005 commited on
Commit
6db9247
1 Parent(s): 2733373

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +132 -0
  3. docs_db/index.faiss +3 -0
  4. docs_db/index.pkl +3 -0
  5. requirements.txt +10 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs_db/index.faiss filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+
4
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
5
+ os.environ["OPENAI_API_KEY"]
6
+ global agent
7
+
8
+
9
+ def create_agent():
10
+
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
13
+ from langchain.chains import ConversationChain
14
+
15
+ global agent
16
+
17
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
18
+ memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=1000)
19
+ agent = ConversationChain(llm=llm, memory=memory, verbose=True)
20
+
21
+ return "Successful!"
22
+
23
+
24
+ def formatted_response(docs, question, response, state):
25
+
26
+ formatted_output = response + "\n\nSources"
27
+
28
+ for i, doc in enumerate(docs):
29
+ source_info = doc.metadata.get("source", "Unknown source")
30
+ page_info = doc.metadata.get("page", None)
31
+
32
+ doc_name = source_info.split("/")[-1].strip()
33
+
34
+ if page_info is not None:
35
+ formatted_output += f"\n{doc_name}\tpage no {page_info}"
36
+ else:
37
+ formatted_output += f"\n{doc_name}"
38
+
39
+ state.append((question, formatted_output))
40
+ return state, state
41
+
42
+
43
+ def search_docs(prompt, question, state):
44
+
45
+ from langchain.embeddings.openai import OpenAIEmbeddings
46
+ from langchain.vectorstores import FAISS
47
+ from langchain.callbacks import get_openai_callback
48
+
49
+ global agent
50
+ agent = agent
51
+
52
+ state = state or []
53
+
54
+ embeddings = OpenAIEmbeddings()
55
+ docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings)
56
+ docs = docs_db.similarity_search(question)
57
+
58
+ prompt += "\n\n"
59
+ prompt += question
60
+ prompt += "\n\n"
61
+ prompt += str(docs)
62
+
63
+ with get_openai_callback() as cb:
64
+ response = agent.predict(input=prompt)
65
+ print(cb)
66
+
67
+ return formatted_response(docs, question, response, state)
68
+
69
+
70
+ import gradio as gr
71
+
72
+ css = """
73
+ .col{
74
+ max-width: 75%;
75
+ margin: 0 auto;
76
+ display: flex;
77
+ flex-direction: column;
78
+ justify-content: center;
79
+ align-items: center;
80
+ }
81
+ """
82
+
83
+ with gr.Blocks(css=css) as demo:
84
+ gr.Markdown("## <center>All in One Document Chatting App</center>")
85
+
86
+ with gr.Tab("Chat With Your Documents"):
87
+ with gr.Column(elem_classes="col"):
88
+
89
+ with gr.Tab("Upload and Process Documents"):
90
+ with gr.Column():
91
+
92
+ # docs_upload_input = gr.Files(label="Upload File(s)")
93
+ # docs_upload_button = gr.Button("Upload")
94
+ # docs_upload_output = gr.Textbox(label="Output")
95
+
96
+ # docs_process_button = gr.Button("Process")
97
+ # docs_process_output = gr.Textbox(label="Output")
98
+
99
+ create_agent_button = gr.Button("Create Agent")
100
+ create_agent_output = gr.Textbox(label="Output")
101
+
102
+ # gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output, create_agent_output])
103
+ gr.ClearButton([create_agent_output])
104
+ with gr.Tab("Query Documents"):
105
+ with gr.Column():
106
+
107
+ docs_prompt_input = gr.Textbox(label="Custom Prompt")
108
+
109
+ docs_chatbot = gr.Chatbot(label="Chats")
110
+ docs_state = gr.State()
111
+
112
+ docs_search_input = gr.Textbox(label="Question")
113
+ docs_search_button = gr.Button("Search")
114
+
115
+ gr.ClearButton([docs_prompt_input, docs_search_input])
116
+
117
+ #########################################################################################################
118
+
119
+ # docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
120
+ # docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
121
+ create_agent_button.click(create_agent, inputs=None, outputs=create_agent_output)
122
+
123
+ docs_search_button.click(
124
+ search_docs,
125
+ inputs=[docs_prompt_input, docs_search_input, docs_state],
126
+ outputs=[docs_chatbot, docs_state],
127
+ )
128
+
129
+ #########################################################################################################
130
+
131
+ demo.queue()
132
+ demo.launch()
docs_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8374c50d4d054a4d3fa22723361beb6d664979685497b9235ec4db5731d7fdc1
3
+ size 57489453
docs_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bc707921fd0cfceb0de06b536e584cd7b3c2746a521b915cf3a49a980b4d6f7
3
+ size 9341865
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ PyPDF2
3
+ pypdf
4
+ docx2txt
5
+ unstructured
6
+ gradio
7
+ faiss-cpu
8
+ openai
9
+ tiktoken
10
+