Spaces:
Sleeping
Sleeping
singhjagpreet
commited on
Commit
•
5b3feea
1
Parent(s):
c0c01c6
updated readme
Browse files- README.md +66 -0
- app.py +18 -6
- chainlit.md +7 -10
- requirements.txt +1 -1
- src/config.py +12 -1
- src/model.py +12 -6
- src/utils.py +5 -5
README.md
CHANGED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Document Reader
|
2 |
+
|
3 |
+
Don't waste time reading lengthy Terms and Conditions! Upload your files here and ask anything you want to know.
|
4 |
+
|
5 |
+
## Introduction
|
6 |
+
|
7 |
+
Welcome to Document Reader! This chatbot is designed to assist users with questions related to uploaded files. Users can upload text documents and inquire about their content.
|
8 |
+
|
9 |
+
## Features
|
10 |
+
|
11 |
+
- **File Upload:** Users can upload files (text/pdf).
|
12 |
+
- **Question and Answer:** Ask questions related to the content of the uploaded file.
|
13 |
+
- **Interactive Assistance:** Receive information and insights based on the uploaded file.
|
14 |
+
|
15 |
+
## Getting Started
|
16 |
+
|
17 |
+
Follow these steps to get started with Document Reader:
|
18 |
+
|
19 |
+
1. Clone the repository:
|
20 |
+
|
21 |
+
```bash
|
22 |
+
git clone https://github.com/SinghJagpreet096/Document-Reader.git
|
23 |
+
cd Document-Reader
|
24 |
+
|
25 |
+
```
|
26 |
+
2. Create Virtual Environment:
|
27 |
+
```bash
|
28 |
+
python -m venv <env-name>
|
29 |
+
```
|
30 |
+
|
31 |
+
3. Activate venv:
|
32 |
+
```bash
|
33 |
+
source <env-name>/bin/activate
|
34 |
+
```
|
35 |
+
4. Install dependencies:
|
36 |
+
|
37 |
+
```bash
|
38 |
+
pip install -r requirements.txt
|
39 |
+
```
|
40 |
+
|
41 |
+
4. Run your app:
|
42 |
+
|
43 |
+
```bash
|
44 |
+
chainlit run app.py
|
45 |
+
```
|
46 |
+
|
47 |
+
## Usage
|
48 |
+
|
49 |
+
1. Open Document Reader.
|
50 |
+
2. Click on the file upload button to share a document.
|
51 |
+
3. Once the file is uploaded, ask questions about its content.
|
52 |
+
4. The chatbot will provide information or insights based on the uploaded file.
|
53 |
+
|
54 |
+
Feel free to type "help" at any time for assistance.
|
55 |
+
|
56 |
+
## Configuration
|
57 |
+
|
58 |
+
The chatbot may require configuration through environment variables. Check the `.env` file for details.
|
59 |
+
|
60 |
+
## Create an `.env` file.
|
61 |
+
```bash
|
62 |
+
echo OPENAI_API_KEY = <your-openai-api-key> > .env
|
63 |
+
```
|
64 |
+
Create an OPENAI_API_KEY here https://platform.openai.com/api-keys
|
65 |
+
|
66 |
+
|
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
|
4 |
-
|
5 |
#export HNSWLIB_NO_NATIVE = 1
|
6 |
|
7 |
from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
|
@@ -12,22 +12,34 @@ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
|
|
12 |
from langchain.chains import ConversationalRetrievalChain
|
13 |
from langchain.chat_models import ChatOpenAI
|
14 |
import chainlit as cl
|
15 |
-
|
16 |
from src.utils import get_docsearch, get_source
|
17 |
|
18 |
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
19 |
# embeddings = OpenAIEmbeddings()
|
20 |
|
21 |
-
welcome_message = """Welcome
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
@cl.on_chat_start
|
25 |
async def start():
|
26 |
-
await cl.Message("
|
27 |
files = None
|
28 |
files = await cl.AskFileMessage(
|
29 |
content=welcome_message,
|
30 |
accept=["text/plain", "application/pdf"],
|
|
|
|
|
31 |
).send()
|
32 |
|
33 |
logging.info("file uploaded")
|
@@ -53,8 +65,8 @@ async def start():
|
|
53 |
|
54 |
## create chain that uses chroma vector store
|
55 |
chain = ConversationalRetrievalChain.from_llm(
|
56 |
-
ChatOpenAI(model_name=
|
57 |
-
chain_type=
|
58 |
retriever=docsearch.as_retriever(),
|
59 |
memory=memory,
|
60 |
return_source_documents=True,
|
|
|
1 |
import os
|
2 |
import logging
|
3 |
|
4 |
+
|
5 |
#export HNSWLIB_NO_NATIVE = 1
|
6 |
|
7 |
from langchain.document_loaders import PyPDFDirectoryLoader, TextLoader
|
|
|
12 |
from langchain.chains import ConversationalRetrievalChain
|
13 |
from langchain.chat_models import ChatOpenAI
|
14 |
import chainlit as cl
|
15 |
+
from src.config import Config
|
16 |
from src.utils import get_docsearch, get_source
|
17 |
|
18 |
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
19 |
# embeddings = OpenAIEmbeddings()
|
20 |
|
21 |
+
welcome_message = """Welcome to Your Document Reader!
|
22 |
+
|
23 |
+
Here to assist you with any questions you have about a file. You can upload a file and ask me questions related to its content. Here's how you can get started:
|
24 |
+
|
25 |
+
1. Click on the file upload button to share a document or image.
|
26 |
+
2. Once the file is uploaded, feel free to ask me any questions about its content.
|
27 |
+
3. I'll do my best to provide information or insights based on the uploaded file.
|
28 |
+
|
29 |
+
If you need help or have any specific queries, type "help" at any time.
|
30 |
+
|
31 |
+
Let's get the conversation started! """
|
32 |
|
33 |
|
34 |
@cl.on_chat_start
|
35 |
async def start():
|
36 |
+
await cl.Message("YOU ARE IN").send()
|
37 |
files = None
|
38 |
files = await cl.AskFileMessage(
|
39 |
content=welcome_message,
|
40 |
accept=["text/plain", "application/pdf"],
|
41 |
+
max_size_mb=Config.max_size_mb,
|
42 |
+
timeout=Config.timeout
|
43 |
).send()
|
44 |
|
45 |
logging.info("file uploaded")
|
|
|
65 |
|
66 |
## create chain that uses chroma vector store
|
67 |
chain = ConversationalRetrievalChain.from_llm(
|
68 |
+
ChatOpenAI(model_name=Config.model_name,temperature=Config.temperature, streaming=Config.streaming),
|
69 |
+
chain_type=Config.chain_type,
|
70 |
retriever=docsearch.as_retriever(),
|
71 |
memory=memory,
|
72 |
return_source_documents=True,
|
chainlit.md
CHANGED
@@ -1,14 +1,11 @@
|
|
1 |
-
|
2 |
|
3 |
-
|
4 |
|
5 |
-
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
## Welcome screen
|
13 |
-
|
14 |
-
To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
|
|
|
1 |
+
Welcome to Your Document Reader!
|
2 |
|
3 |
+
Here to assist you with any questions you have about a file. You can upload a file and ask me questions related to its content. Here's how you can get started:
|
4 |
|
5 |
+
1. Click on the file upload button to share a document or image.
|
6 |
+
2. Once the file is uploaded, feel free to ask me any questions about its content.
|
7 |
+
3. I'll do my best to provide information or insights based on the uploaded file.
|
8 |
|
9 |
+
If you need help or have any specific queries, type "help" at any time.
|
|
|
10 |
|
11 |
+
Let's get the conversation started!
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -4,4 +4,4 @@ pypdf
|
|
4 |
chainlit
|
5 |
openai
|
6 |
chromadb
|
7 |
-
tiktoken
|
|
|
4 |
chainlit
|
5 |
openai
|
6 |
chromadb
|
7 |
+
tiktoken
|
src/config.py
CHANGED
@@ -1,13 +1,24 @@
|
|
1 |
import os
|
2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
4 |
|
5 |
|
6 |
|
7 |
class Config:
|
8 |
temperature = 0
|
9 |
streaming = True
|
|
|
|
|
10 |
chain_type = "stuff"
|
11 |
max_token_limit = 4098
|
12 |
embeddings = OpenAIEmbeddings()
|
13 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
|
5 |
|
6 |
|
7 |
|
8 |
class Config:
|
9 |
temperature = 0
|
10 |
streaming = True
|
11 |
+
max_size_mb=20
|
12 |
+
timeout=180
|
13 |
chain_type = "stuff"
|
14 |
max_token_limit = 4098
|
15 |
embeddings = OpenAIEmbeddings()
|
16 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
17 |
+
model_name="gpt-3.5-turbo"
|
18 |
+
message_history = ChatMessageHistory()
|
19 |
+
memory = ConversationBufferMemory(
|
20 |
+
memory_key="chat_history",
|
21 |
+
output_key="answer",
|
22 |
+
chat_memory=message_history,
|
23 |
+
return_messages=True
|
24 |
+
)
|
src/model.py
CHANGED
@@ -2,6 +2,7 @@ from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesCha
|
|
2 |
from langchain.chat_models import ChatOpenAI
|
3 |
import logging
|
4 |
import os
|
|
|
5 |
|
6 |
|
7 |
from src.config import Config
|
@@ -11,16 +12,21 @@ from src.config import Config
|
|
11 |
|
12 |
|
13 |
def load_model():
|
14 |
-
model = ChatOpenAI(
|
15 |
-
|
|
|
16 |
return model
|
17 |
|
18 |
|
19 |
def load_chain(docsearch):
|
20 |
model = load_model()
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
25 |
)
|
|
|
26 |
return chain
|
|
|
2 |
from langchain.chat_models import ChatOpenAI
|
3 |
import logging
|
4 |
import os
|
5 |
+
from langchain.chains import ConversationalRetrievalChain
|
6 |
|
7 |
|
8 |
from src.config import Config
|
|
|
12 |
|
13 |
|
14 |
def load_model():
|
15 |
+
model = ChatOpenAI(model_name=Config.model_name,
|
16 |
+
temperature=Config.temperature,
|
17 |
+
streaming=Config.streaming)
|
18 |
return model
|
19 |
|
20 |
|
21 |
def load_chain(docsearch):
|
22 |
model = load_model()
|
23 |
+
|
24 |
+
|
25 |
+
chain = ConversationalRetrievalChain.from_llm(load_model,
|
26 |
+
chain_type=Config.chain_type,
|
27 |
+
retriever=docsearch.as_retriever(),
|
28 |
+
memory=Config.memory,
|
29 |
+
return_source_documents=True,
|
30 |
)
|
31 |
+
|
32 |
return chain
|
src/utils.py
CHANGED
@@ -6,13 +6,13 @@ from langchain.vectorstores import Chroma
|
|
6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
8 |
import chainlit as cl
|
9 |
-
|
10 |
|
11 |
from src.config import Config
|
12 |
import logging
|
13 |
|
14 |
-
text_splitter = RecursiveCharacterTextSplitter()
|
15 |
-
embeddings = OpenAIEmbeddings()
|
16 |
|
17 |
def process_file(file: AskFileResponse):
|
18 |
import tempfile
|
@@ -26,7 +26,7 @@ def process_file(file: AskFileResponse):
|
|
26 |
tempfile.write(file.content)
|
27 |
loader = Loader(tempfile.name)
|
28 |
documents = loader.load()
|
29 |
-
docs = text_splitter.split_documents(documents)
|
30 |
for i, doc in enumerate(docs):
|
31 |
doc.metadata["source"] = f"source_{i}"
|
32 |
return docs
|
@@ -40,7 +40,7 @@ def get_docsearch(file: AskFileResponse):
|
|
40 |
# Create a unique namespace for the file
|
41 |
|
42 |
docsearch = Chroma.from_documents(
|
43 |
-
docs, embeddings
|
44 |
)
|
45 |
return docsearch
|
46 |
|
|
|
6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
8 |
import chainlit as cl
|
9 |
+
from src.config import Config
|
10 |
|
11 |
from src.config import Config
|
12 |
import logging
|
13 |
|
14 |
+
# text_splitter = RecursiveCharacterTextSplitter()
|
15 |
+
# embeddings = OpenAIEmbeddings()
|
16 |
|
17 |
def process_file(file: AskFileResponse):
|
18 |
import tempfile
|
|
|
26 |
tempfile.write(file.content)
|
27 |
loader = Loader(tempfile.name)
|
28 |
documents = loader.load()
|
29 |
+
docs = Config.text_splitter.split_documents(documents)
|
30 |
for i, doc in enumerate(docs):
|
31 |
doc.metadata["source"] = f"source_{i}"
|
32 |
return docs
|
|
|
40 |
# Create a unique namespace for the file
|
41 |
|
42 |
docsearch = Chroma.from_documents(
|
43 |
+
docs, Config.embeddings
|
44 |
)
|
45 |
return docsearch
|
46 |
|