singhjagpreet commited on
Commit
1cb46fc
1 Parent(s): 99a3f34

file loaded completed

Browse files
Files changed (5) hide show
  1. app.py +27 -11
  2. requirements.txt +1 -1
  3. src/config.py +1 -1
  4. src/model.py +7 -5
  5. src/utils.py +6 -6
app.py CHANGED
@@ -8,17 +8,11 @@ from src.utils import get_docSearch, get_source
8
  from src.model import load_chain
9
 
10
 
11
-
12
-
13
-
14
-
15
-
16
-
17
  welcome_message = """ Upload your file here"""
18
 
19
  @cl.on_chat_start
20
  async def start():
21
- await cl.Message("you are in ").send()
22
  logging.info(f"app started")
23
  files = None
24
  while files is None:
@@ -30,7 +24,7 @@ async def start():
30
  ).send()
31
  logging.info("uploader excecuted")
32
  file = files[0]
33
- msg = cl.Message(content=f"Processing `{type(files)}` {file.name}....")
34
  await msg.send()
35
 
36
  logging.info("processing started")
@@ -47,22 +41,38 @@ async def start():
47
  ## let the user know when system is ready
48
 
49
  msg.content = f"{file.name} processed. You begin asking questions"
50
-
51
  await msg.update()
52
 
53
  logging.info("processing completed")
54
 
55
  cl.user_session.set("chain", chain)
56
 
 
 
57
  @cl.on_message
58
  async def main(message):
 
 
59
  chain = cl.user_session.get("chain")
 
 
60
  cb = cl.AsyncLangchainCallbackHandler(
61
- stream_final_answer=True, answer_prefix_tokens=["FINAL","ANSWER"]
62
  )
 
 
 
63
 
64
  cb.answer_reached = True
 
 
65
  res = await chain.acall(message, callbacks=[cb])
 
 
 
 
 
 
66
 
67
  answer = res["answer"]
68
  sources = res["sources"].strip()
@@ -73,11 +83,17 @@ async def main(message):
73
  metadatas = [doc.metadata for doc in docs]
74
  all_sources = [m["source"]for m in metadatas]
75
 
76
- source_elements,answer = get_source(sources,all_sources,docs,cl)
 
 
 
 
77
 
78
  if cb.has_streamed_final_answer:
79
  cb.final_stream.elements = source_elements
80
  await cb.final_stream.update()
 
81
  else:
82
  await cl.Message(content=answer, elements=source_elements).send()
 
83
 
 
8
  from src.model import load_chain
9
 
10
 
 
 
 
 
 
 
11
  welcome_message = """ Upload your file here"""
12
 
13
  @cl.on_chat_start
14
  async def start():
15
+ await cl.Message(content="you are in ").send()
16
  logging.info(f"app started")
17
  files = None
18
  while files is None:
 
24
  ).send()
25
  logging.info("uploader excecuted")
26
  file = files[0]
27
+ msg = cl.Message(content=f"Processing {file.name}....")
28
  await msg.send()
29
 
30
  logging.info("processing started")
 
41
  ## let the user know when system is ready
42
 
43
  msg.content = f"{file.name} processed. You begin asking questions"
 
44
  await msg.update()
45
 
46
  logging.info("processing completed")
47
 
48
  cl.user_session.set("chain", chain)
49
 
50
+ logging.info("chain saved for active session")
51
+
52
  @cl.on_message
53
  async def main(message):
54
+
55
+
56
  chain = cl.user_session.get("chain")
57
+
58
+ logging.info(f"retrived chain for QA {type(chain)}")
59
  cb = cl.AsyncLangchainCallbackHandler(
60
+ stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
61
  )
62
+
63
+ logging.info("define call backs")
64
+
65
 
66
  cb.answer_reached = True
67
+ logging.info("answer reached")
68
+
69
  res = await chain.acall(message, callbacks=[cb])
70
+ logging.info("define res")
71
+
72
+
73
+ logging.info("call backs ")
74
+
75
+
76
 
77
  answer = res["answer"]
78
  sources = res["sources"].strip()
 
83
  metadatas = [doc.metadata for doc in docs]
84
  all_sources = [m["source"]for m in metadatas]
85
 
86
+
87
+
88
+ source_elements = get_source(sources,all_sources,docs,cl)
89
+
90
+ logging.info("getting source")
91
 
92
  if cb.has_streamed_final_answer:
93
  cb.final_stream.elements = source_elements
94
  await cb.final_stream.update()
95
+ logging.info("call back triggred")
96
  else:
97
  await cl.Message(content=answer, elements=source_elements).send()
98
+ logging.info("post message")
99
 
requirements.txt CHANGED
@@ -4,4 +4,4 @@ python-dotenv
4
  chainlit
5
  chromadb
6
  tiktoken
7
- tokenizers
 
4
  chainlit
5
  chromadb
6
  tiktoken
7
+ tokenizers
src/config.py CHANGED
@@ -9,5 +9,5 @@ class Config:
9
  streaming = True
10
  chain_type = "stuff"
11
  max_token_limit = 4098
12
- embeddings = OpenAIEmbeddings(api_key=os.getenv('OPENAI_API_KEY'))
13
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
9
  streaming = True
10
  chain_type = "stuff"
11
  max_token_limit = 4098
12
+ embeddings = OpenAIEmbeddings()
13
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
src/model.py CHANGED
@@ -1,4 +1,4 @@
1
- from langchain.chains import RetrievalQAWithSourcesChain
2
  from langchain.chat_models import ChatOpenAI
3
  import logging
4
  import os
@@ -12,13 +12,15 @@ from src.config import Config
12
 
13
  def load_model():
14
  model = ChatOpenAI(temperature=Config.temperature,
15
- streaming=Config.streaming,api_key=os.getenv('OPENAI_API_KEY'))
16
  return model
17
 
18
 
19
  def load_chain(docsearch):
20
  model = load_model()
21
- chain = RetrievalQAWithSourcesChain.from_chain_type(model,
22
- chain_type=Config.chain_type,
23
- retriever=docsearch.as_retriever(max_tokens_limit=Config.max_token_limit))
 
 
24
  return chain
 
1
+ from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
2
  from langchain.chat_models import ChatOpenAI
3
  import logging
4
  import os
 
12
 
13
  def load_model():
14
  model = ChatOpenAI(temperature=Config.temperature,
15
+ streaming=Config.streaming)
16
  return model
17
 
18
 
19
  def load_chain(docsearch):
20
  model = load_model()
21
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
22
+ ChatOpenAI(temperature=0, streaming=True),
23
+ chain_type="stuff",
24
+ retriever=docsearch.as_retriever(max_tokens_limit=4097),
25
+ )
26
  return chain
src/utils.py CHANGED
@@ -1,15 +1,13 @@
1
  from chainlit.types import AskFileResponse
2
  import click
3
  from langchain.document_loaders import TextLoader
4
- from langchain.document_loaders import PyPDFDirectoryLoader
5
  from langchain.vectorstores import Chroma
6
 
7
 
8
  from src.config import Config
9
- # import chainlit as cl
10
  import logging
11
- import openai
12
- import os
13
  from dotenv import load_dotenv
14
 
15
  load_dotenv()
@@ -23,7 +21,7 @@ def process_file(file: AskFileResponse):
23
  if file.type == "text/plain":
24
  Loader = TextLoader
25
  elif file.type == "application/pdf":
26
- Loader = PyPDFDirectoryLoader
27
 
28
  with tempfile.NamedTemporaryFile() as tempfile:
29
  tempfile.write(file.content)
@@ -48,7 +46,9 @@ def get_docSearch(file,cl):
48
 
49
  docsearch = Chroma.from_documents(docs, Config.embeddings)
50
 
51
- logging.info("embedding completed")
 
 
52
 
53
  return docsearch
54
 
 
1
  from chainlit.types import AskFileResponse
2
  import click
3
  from langchain.document_loaders import TextLoader
4
+ from langchain.document_loaders import PyPDFLoader
5
  from langchain.vectorstores import Chroma
6
 
7
 
8
  from src.config import Config
 
9
  import logging
10
+
 
11
  from dotenv import load_dotenv
12
 
13
  load_dotenv()
 
21
  if file.type == "text/plain":
22
  Loader = TextLoader
23
  elif file.type == "application/pdf":
24
+ Loader = PyPDFLoader
25
 
26
  with tempfile.NamedTemporaryFile() as tempfile:
27
  tempfile.write(file.content)
 
46
 
47
  docsearch = Chroma.from_documents(docs, Config.embeddings)
48
 
49
+ logging.info(f"embedding completed {type(Config.embeddings)}")
50
+
51
+ logging.info(f"type of docsearch {type(docsearch)}")
52
 
53
  return docsearch
54