Spaces:

singhjagpreet
/

Document-Reader

Sleeping

App Files Files Community

singhjagpreet commited on Dec 25, 2023

Commit

1cb46fc

•

1 Parent(s): 99a3f34

file loaded completed

Browse files

Files changed (5) hide show

app.py +27 -11
requirements.txt +1 -1
src/config.py +1 -1
src/model.py +7 -5
src/utils.py +6 -6

app.py CHANGED Viewed

@@ -8,17 +8,11 @@ from src.utils import get_docSearch, get_source
 from src.model import load_chain
 welcome_message = """ Upload your file here"""
 @cl.on_chat_start
 async def start():
-    await cl.Message("you are in ").send()
     logging.info(f"app started")
     files = None
     while files is None:
@@ -30,7 +24,7 @@ async def start():
         ).send()
     logging.info("uploader excecuted")
     file = files[0]
-    msg = cl.Message(content=f"Processing `{type(files)}` {file.name}....")
     await msg.send()
     logging.info("processing started")
@@ -47,22 +41,38 @@ async def start():
     ## let the user know when system is ready
     msg.content = f"{file.name} processed. You begin asking questions"
     await msg.update()
     logging.info("processing completed")
     cl.user_session.set("chain", chain)
 @cl.on_message
 async def main(message):
     chain = cl.user_session.get("chain")
     cb = cl.AsyncLangchainCallbackHandler(
-        stream_final_answer=True, answer_prefix_tokens=["FINAL","ANSWER"]
     )
     cb.answer_reached = True
     res = await chain.acall(message, callbacks=[cb])
     answer = res["answer"]
     sources = res["sources"].strip()
@@ -73,11 +83,17 @@ async def main(message):
     metadatas = [doc.metadata for doc in docs]
     all_sources = [m["source"]for m in metadatas]
-    source_elements,answer = get_source(sources,all_sources,docs,cl)
     if cb.has_streamed_final_answer:
         cb.final_stream.elements = source_elements
         await cb.final_stream.update()
     else:
         await cl.Message(content=answer, elements=source_elements).send()

 from src.model import load_chain
 welcome_message = """ Upload your file here"""
 @cl.on_chat_start
 async def start():
+    await cl.Message(content="you are in ").send()
     logging.info(f"app started")
     files = None
     while files is None:
         ).send()
     logging.info("uploader excecuted")
     file = files[0]
+    msg = cl.Message(content=f"Processing  {file.name}....")
     await msg.send()
     logging.info("processing started")
     ## let the user know when system is ready
     msg.content = f"{file.name} processed. You begin asking questions"
     await msg.update()
     logging.info("processing completed")
     cl.user_session.set("chain", chain)
+    logging.info("chain saved for active session")
 @cl.on_message
 async def main(message):
     chain = cl.user_session.get("chain")
+    logging.info(f"retrived chain for QA {type(chain)}")
     cb = cl.AsyncLangchainCallbackHandler(
+        stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
     )
+    logging.info("define call backs")
     cb.answer_reached = True
+    logging.info("answer reached")
     res = await chain.acall(message, callbacks=[cb])
+    logging.info("define res")
+    logging.info("call backs ")
     answer = res["answer"]
     sources = res["sources"].strip()
     metadatas = [doc.metadata for doc in docs]
     all_sources = [m["source"]for m in metadatas]
+    source_elements = get_source(sources,all_sources,docs,cl)
+    logging.info("getting source")
     if cb.has_streamed_final_answer:
         cb.final_stream.elements = source_elements
         await cb.final_stream.update()
+        logging.info("call back triggred")
     else:
         await cl.Message(content=answer, elements=source_elements).send()
+        logging.info("post message")

requirements.txt CHANGED Viewed

@@ -4,4 +4,4 @@ python-dotenv
 chainlit
 chromadb
 tiktoken
-tokenizers

 chainlit
 chromadb
 tiktoken
+tokenizers

src/config.py CHANGED Viewed

@@ -9,5 +9,5 @@ class Config:
     streaming = True
     chain_type = "stuff"
     max_token_limit = 4098
-    embeddings = OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY'))
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

     streaming = True
     chain_type = "stuff"
     max_token_limit = 4098
+    embeddings = OpenAIEmbeddings()
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

src/model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from langchain.chains import RetrievalQAWithSourcesChain
 from langchain.chat_models import ChatOpenAI
 import logging
 import os
@@ -12,13 +12,15 @@ from src.config import Config
 def load_model():
     model = ChatOpenAI(temperature=Config.temperature,
-                   streaming=Config.streaming,api_key=os.getenv('OPENAI_API_KEY'))
     return model
 def load_chain(docsearch):
     model = load_model()
-    chain = RetrievalQAWithSourcesChain.from_chain_type(model,
-                                                        chain_type=Config.chain_type,
-                                                        retriever=docsearch.as_retriever(max_tokens_limit=Config.max_token_limit))
     return chain

+from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
 from langchain.chat_models import ChatOpenAI
 import logging
 import os
 def load_model():
     model = ChatOpenAI(temperature=Config.temperature,
+                   streaming=Config.streaming)
     return model
 def load_chain(docsearch):
     model = load_model()
+    chain = RetrievalQAWithSourcesChain.from_chain_type(
+        ChatOpenAI(temperature=0, streaming=True),
+        chain_type="stuff",
+        retriever=docsearch.as_retriever(max_tokens_limit=4097),
+    )
     return chain

src/utils.py CHANGED Viewed

@@ -1,15 +1,13 @@
 from chainlit.types import AskFileResponse
 import click
 from langchain.document_loaders import TextLoader
-from langchain.document_loaders import PyPDFDirectoryLoader
 from langchain.vectorstores import Chroma
 from src.config import Config
-# import chainlit as cl
 import logging
-import openai
-import os
 from dotenv import load_dotenv
 load_dotenv()
@@ -23,7 +21,7 @@ def process_file(file: AskFileResponse):
     if file.type == "text/plain":
         Loader = TextLoader
     elif file.type == "application/pdf":
-        Loader = PyPDFDirectoryLoader
     with tempfile.NamedTemporaryFile() as tempfile:
         tempfile.write(file.content)
@@ -48,7 +46,9 @@ def get_docSearch(file,cl):
     docsearch = Chroma.from_documents(docs, Config.embeddings)
-    logging.info("embedding completed")
     return docsearch

 from chainlit.types import AskFileResponse
 import click
 from langchain.document_loaders import TextLoader
+from langchain.document_loaders import PyPDFLoader
 from langchain.vectorstores import Chroma
 from src.config import Config
 import logging
 from dotenv import load_dotenv
 load_dotenv()
     if file.type == "text/plain":
         Loader = TextLoader
     elif file.type == "application/pdf":
+        Loader = PyPDFLoader
     with tempfile.NamedTemporaryFile() as tempfile:
         tempfile.write(file.content)
     docsearch = Chroma.from_documents(docs, Config.embeddings)
+    logging.info(f"embedding completed {type(Config.embeddings)}")
+    logging.info(f"type of docsearch {type(docsearch)}")
     return docsearch