pratham0011 commited on
Commit
3384f88
·
verified ·
1 Parent(s): 90f101b

docker file and formatting

Browse files
Files changed (6) hide show
  1. Dockerfile +32 -0
  2. app/__init__.py +0 -0
  3. app/app.py +62 -0
  4. app/streamlit_app.py +50 -0
  5. main.py +5 -120
  6. requirements.txt +11 -11
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.12
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the current directory contents into the container at /app
8
+ COPY . /app
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y \
12
+ build-essential \
13
+ curl \
14
+ software-properties-common \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Make port 7860 available to the world outside this container
21
+ EXPOSE 7860
22
+
23
+ # Create a script to run both FastAPI and Streamlit
24
+ RUN echo '#!/bin/bash\n\
25
+ uvicorn main:app --host 0.0.0.0 --port 8000 &\n\
26
+ streamlit run streamlit_app.py --server.port 7860 --server.address 0.0.0.0\n\
27
+ ' > /app/run.sh
28
+
29
+ RUN chmod +x /app/run.sh
30
+
31
+ # Run the script when the container launches
32
+ CMD ["/app/run.sh"]
app/__init__.py ADDED
File without changes
app/app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
2
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
3
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
4
+ from llama_index.core import Settings
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+
12
+ # Configure the Llama index settings
13
+ Settings.llm = HuggingFaceInferenceAPI(
14
+ model_name="meta-llama/Meta-Llama-3-8B-Instruct",
15
+ tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
16
+ context_window=3900,
17
+ token=os.getenv("HF_TOKEN"),
18
+ max_new_tokens=1000,
19
+ generate_kwargs={"temperature": 0.5},
20
+ )
21
+ Settings.embed_model = HuggingFaceEmbedding(
22
+ model_name="BAAI/bge-small-en-v1.5"
23
+ )
24
+
25
+ # Define the directory for persistent storage and data
26
+ PERSIST_DIR = "./db"
27
+ DATA_DIR = "data"
28
+
29
+ # Ensure data directory exists
30
+ os.makedirs(DATA_DIR, exist_ok=True)
31
+ os.makedirs(PERSIST_DIR, exist_ok=True)
32
+
33
+ def data_ingestion():
34
+ documents = SimpleDirectoryReader(DATA_DIR).load_data()
35
+ storage_context = StorageContext.from_defaults()
36
+ index = VectorStoreIndex.from_documents(documents)
37
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
38
+
39
+ def handle_query(query):
40
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
41
+ index = load_index_from_storage(storage_context)
42
+ chat_text_qa_msgs = [
43
+ (
44
+ "user",
45
+ """You are Q&A assistant named CHAT-DOC. Your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
46
+ Context:
47
+ {context_str}
48
+ Question:
49
+ {query_str}
50
+ """
51
+ )
52
+ ]
53
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
54
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
55
+ answer = query_engine.query(query)
56
+
57
+ if hasattr(answer, 'response'):
58
+ return answer.response
59
+ elif isinstance(answer, dict) and 'response' in answer:
60
+ return answer['response']
61
+ else:
62
+ return "Sorry, I couldn't find an answer."
app/streamlit_app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+
4
+
5
+ # Streamlit UI
6
+ def streamlit_ui():
7
+ st.title("Chat with your Document 📄")
8
+ st.markdown("Chat here👇")
9
+
10
+ icons = {"assistant": "🤖", "user": "👤"}
11
+
12
+ if 'messages' not in st.session_state:
13
+ st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF, DOCX, or TXT file and ask me anything about its content.'}]
14
+
15
+ for message in st.session_state.messages:
16
+ with st.chat_message(message['role'], avatar=icons[message['role']]):
17
+ st.write(message['content'])
18
+
19
+ with st.sidebar:
20
+ st.title("Menu:")
21
+ uploaded_file = st.file_uploader("Upload your document (PDF, DOCX, TXT)", type=["pdf", "docx", "txt"])
22
+ if st.button("Submit & Process") and uploaded_file:
23
+ with st.spinner("Processing..."):
24
+ files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)}
25
+ response = requests.post("http://localhost:8000/upload", files=files)
26
+ if response.status_code == 200:
27
+ st.success("File uploaded and processed successfully")
28
+ else:
29
+ st.error("Error uploading file")
30
+
31
+ user_prompt = st.chat_input("Ask me anything about the content of the document:")
32
+
33
+ if user_prompt:
34
+ st.session_state.messages.append({'role': 'user', "content": user_prompt})
35
+ with st.chat_message("user", avatar=icons["user"]):
36
+ st.write(user_prompt)
37
+
38
+ # Trigger assistant's response retrieval and update UI
39
+ with st.spinner("Thinking..."):
40
+ response = requests.post("http://localhost:8000/query", json={"question": user_prompt})
41
+ if response.status_code == 200:
42
+ assistant_response = response.json()["response"]
43
+ with st.chat_message("assistant", avatar=icons["assistant"]):
44
+ st.write(assistant_response)
45
+ st.session_state.messages.append({'role': 'assistant', "content": assistant_response})
46
+ else:
47
+ st.error("Error querying document")
48
+
49
+ if __name__ == "__main__":
50
+ streamlit_ui()
main.py CHANGED
@@ -1,79 +1,16 @@
1
- # File: main.py
2
- from fastapi import FastAPI, UploadFile, File, HTTPException
3
- from pydantic import BaseModel
4
- from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
5
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
- from llama_index.core import Settings
8
  import os
9
- from dotenv import load_dotenv
10
  import shutil
11
  import uvicorn
12
- import streamlit as st
13
- import requests
14
- import base64
15
- import docx2txt
16
- import threading
17
-
18
- # Load environment variables
19
- load_dotenv()
20
-
21
- app = FastAPI()
22
-
23
- # Configure the Llama index settings
24
- Settings.llm = HuggingFaceInferenceAPI(
25
- model_name="meta-llama/Meta-Llama-3-8B-Instruct",
26
- tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
27
- context_window=3900,
28
- token=os.getenv("HF_TOKEN"),
29
- max_new_tokens=1000,
30
- generate_kwargs={"temperature": 0.5},
31
- )
32
- Settings.embed_model = HuggingFaceEmbedding(
33
- model_name="BAAI/bge-small-en-v1.5"
34
- )
35
 
36
- # Define the directory for persistent storage and data
37
- PERSIST_DIR = "./db"
38
- DATA_DIR = "data"
39
 
40
- # Ensure data directory exists
41
- os.makedirs(DATA_DIR, exist_ok=True)
42
- os.makedirs(PERSIST_DIR, exist_ok=True)
43
 
44
  class Query(BaseModel):
45
  question: str
46
 
47
- def data_ingestion():
48
- documents = SimpleDirectoryReader(DATA_DIR).load_data()
49
- storage_context = StorageContext.from_defaults()
50
- index = VectorStoreIndex.from_documents(documents)
51
- index.storage_context.persist(persist_dir=PERSIST_DIR)
52
-
53
- def handle_query(query):
54
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
55
- index = load_index_from_storage(storage_context)
56
- chat_text_qa_msgs = [
57
- (
58
- "user",
59
- """You are Q&A assistant named CHAT-DOC. Your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
60
- Context:
61
- {context_str}
62
- Question:
63
- {query_str}
64
- """
65
- )
66
- ]
67
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
68
- query_engine = index.as_query_engine(text_qa_template=text_qa_template)
69
- answer = query_engine.query(query)
70
-
71
- if hasattr(answer, 'response'):
72
- return answer.response
73
- elif isinstance(answer, dict) and 'response' in answer:
74
- return answer['response']
75
- else:
76
- return "Sorry, I couldn't find an answer."
77
 
78
  @app.post("/upload")
79
  async def upload_file(file: UploadFile = File(...)):
@@ -96,57 +33,5 @@ async def query_document(query: Query):
96
  response = handle_query(query.question)
97
  return {"response": response}
98
 
99
- # Streamlit UI
100
- def streamlit_ui():
101
- st.title("Chat with your Document 📄")
102
- st.markdown("Chat here👇")
103
-
104
- icons = {"assistant": "🤖", "user": "👤"}
105
-
106
- if 'messages' not in st.session_state:
107
- st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF, DOCX, or TXT file and ask me anything about its content.'}]
108
-
109
- for message in st.session_state.messages:
110
- with st.chat_message(message['role'], avatar=icons[message['role']]):
111
- st.write(message['content'])
112
-
113
- with st.sidebar:
114
- st.title("Menu:")
115
- uploaded_file = st.file_uploader("Upload your document (PDF, DOCX, TXT)", type=["pdf", "docx", "txt"])
116
- if st.button("Submit & Process") and uploaded_file:
117
- with st.spinner("Processing..."):
118
- files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)}
119
- response = requests.post("http://localhost:8000/upload", files=files)
120
- if response.status_code == 200:
121
- st.success("File uploaded and processed successfully")
122
- else:
123
- st.error("Error uploading file")
124
-
125
- user_prompt = st.chat_input("Ask me anything about the content of the document:")
126
-
127
- if user_prompt:
128
- st.session_state.messages.append({'role': 'user', "content": user_prompt})
129
- with st.chat_message("user", avatar=icons["user"]):
130
- st.write(user_prompt)
131
-
132
- # Trigger assistant's response retrieval and update UI
133
- with st.spinner("Thinking..."):
134
- response = requests.post("http://localhost:8000/query", json={"question": user_prompt})
135
- if response.status_code == 200:
136
- assistant_response = response.json()["response"]
137
- with st.chat_message("assistant", avatar=icons["assistant"]):
138
- st.write(assistant_response)
139
- st.session_state.messages.append({'role': 'assistant', "content": assistant_response})
140
- else:
141
- st.error("Error querying document")
142
-
143
- def run_fastapi():
144
- uvicorn.run(app, host="0.0.0.0", port=8000)
145
-
146
  if __name__ == "__main__":
147
- # Start FastAPI in a separate thread
148
- fastapi_thread = threading.Thread(target=run_fastapi)
149
- fastapi_thread.start()
150
-
151
- # Run Streamlit (this will run in the main thread)
152
- streamlit_ui()
 
 
 
 
 
 
 
 
1
  import os
 
2
  import shutil
3
  import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ from pydantic import BaseModel
6
+ from fastapi import FastAPI, File, UploadFile, HTTPException
 
7
 
8
+ from app.app import data_ingestion, handle_query, DATA_DIR
 
 
9
 
10
  class Query(BaseModel):
11
  question: str
12
 
13
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  @app.post("/upload")
16
  async def upload_file(file: UploadFile = File(...)):
 
33
  response = handle_query(query.question)
34
  return {"response": response}
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  if __name__ == "__main__":
37
+ uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- streamlit
2
- python-dotenv
3
- llama-index
4
- llama-index-embeddings-huggingface
5
- llama-index-llms-huggingface
6
- gradio==3.50
7
- docx2txt
8
- tf-keras
9
- fastapi
10
- pydantic
11
- uvicorn
 
1
+ streamlit
2
+ python-dotenv
3
+ llama-index
4
+ llama-index-embeddings-huggingface
5
+ llama-index-llms-huggingface
6
+ gradio==3.50
7
+ docx2txt
8
+ tf-keras
9
+ fastapi
10
+ pydantic
11
+ uvicorn