Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
updating the spaces log error
Browse files- app.py +19 -18
- auditqa/__pycache__/__init__.cpython-310.pyc +0 -0
- auditqa/__pycache__/process_chunks.cpython-310.pyc +0 -0
- auditqa/__pycache__/reader.cpython-310.pyc +0 -0
- auditqa/__pycache__/reports.cpython-310.pyc +0 -0
- auditqa/__pycache__/retriever.cpython-310.pyc +0 -0
- auditqa/__pycache__/sample_questions.cpython-310.pyc +0 -0
- auditqa/__pycache__/utils.cpython-310.pyc +0 -0
- auditqa/process_chunks.py +6 -4
- auditqa/reader.py +5 -0
- auditqa/retriever.py +1 -2
- auditqa/utils.py +1 -1
app.py
CHANGED
@@ -13,7 +13,6 @@ from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
|
13 |
from auditqa.retriever import get_context
|
14 |
from auditqa.reader import nvidia_client, dedicated_endpoint
|
15 |
from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
|
16 |
-
|
17 |
from dotenv import load_dotenv
|
18 |
load_dotenv()
|
19 |
|
@@ -35,7 +34,7 @@ scheduler = CommitScheduler(
|
|
35 |
path_in_repo="audit_chatbot",
|
36 |
token=SPACES_LOG )
|
37 |
|
38 |
-
|
39 |
# reports contain the already created chunks from Markdown version of pdf reports
|
40 |
# document processing was done using : https://github.com/axa-group/Parsr
|
41 |
# We need to create the local vectorstore collection once using load_chunks
|
@@ -45,7 +44,7 @@ scheduler = CommitScheduler(
|
|
45 |
# once the vectore embeddings are created we will use qdrant client to access these
|
46 |
vectorstores = get_local_qdrant()
|
47 |
|
48 |
-
|
49 |
def start_chat(query,history):
|
50 |
history = history + [(query,None)]
|
51 |
history = [tuple(x) for x in history]
|
@@ -59,13 +58,12 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
59 |
to yield a tuple of:(messages in gradio format/messages in langchain format, source documents)
|
60 |
"""
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
logging.info(f"year:{year}")
|
69 |
docs_html = ""
|
70 |
output_query = ""
|
71 |
|
@@ -78,9 +76,11 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
78 |
context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
|
79 |
context_retrieved_lst = [doc.page_content for doc in context_retrieved]
|
80 |
|
81 |
-
##------------------- -------------Prompt
|
82 |
SYSTEM_PROMPT = """
|
83 |
-
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist.
|
|
|
|
|
84 |
Guidelines:
|
85 |
- Passeges are provided as comma separated list of strings
|
86 |
- If the passages have useful facts or numbers, use them in your answer.
|
@@ -114,9 +114,9 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
114 |
if model_config.get('reader','TYPE') == 'NVIDIA':
|
115 |
chat_model = nvidia_client()
|
116 |
async def process_stream():
|
117 |
-
nonlocal answer_yet
|
118 |
-
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
119 |
-
#
|
120 |
# Iterate over the streaming response chunks
|
121 |
response = chat_model.chat_completion(
|
122 |
model=model_config.get("reader","NVIDIA_MODEL"),
|
@@ -139,7 +139,8 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
139 |
else:
|
140 |
chat_model = dedicated_endpoint()
|
141 |
async def process_stream():
|
142 |
-
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
|
|
143 |
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
144 |
# Iterate over the streaming response chunks
|
145 |
async for chunk in chat_model.astream(messages):
|
@@ -171,14 +172,14 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
171 |
"answer": history[-1][1],
|
172 |
"time": timestamp,
|
173 |
}
|
174 |
-
save_logs(logs)
|
175 |
except Exception as e:
|
176 |
logging.error(e)
|
177 |
|
178 |
|
179 |
|
180 |
|
181 |
-
|
182 |
|
183 |
# Set up Gradio Theme
|
184 |
theme = gr.themes.Base(
|
|
|
13 |
from auditqa.retriever import get_context
|
14 |
from auditqa.reader import nvidia_client, dedicated_endpoint
|
15 |
from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
|
|
|
16 |
from dotenv import load_dotenv
|
17 |
load_dotenv()
|
18 |
|
|
|
34 |
path_in_repo="audit_chatbot",
|
35 |
token=SPACES_LOG )
|
36 |
|
37 |
+
#####--------------- VECTOR STORE -------------------------------------------------
|
38 |
# reports contain the already created chunks from Markdown version of pdf reports
|
39 |
# document processing was done using : https://github.com/axa-group/Parsr
|
40 |
# We need to create the local vectorstore collection once using load_chunks
|
|
|
44 |
# once the vectore embeddings are created we will use qdrant client to access these
|
45 |
vectorstores = get_local_qdrant()
|
46 |
|
47 |
+
#####---------------------CHAT-----------------------------------------------------
|
48 |
def start_chat(query,history):
|
49 |
history = history + [(query,None)]
|
50 |
history = [tuple(x) for x in history]
|
|
|
58 |
to yield a tuple of:(messages in gradio format/messages in langchain format, source documents)
|
59 |
"""
|
60 |
|
61 |
+
print(f">> NEW QUESTION : {query}")
|
62 |
+
print(f"history:{history}")
|
63 |
+
print(f"sources:{sources}")
|
64 |
+
print(f"reports:{reports}")
|
65 |
+
print(f"subtype:{subtype}")
|
66 |
+
print(f"year:{year}")
|
|
|
67 |
docs_html = ""
|
68 |
output_query = ""
|
69 |
|
|
|
76 |
context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
|
77 |
context_retrieved_lst = [doc.page_content for doc in context_retrieved]
|
78 |
|
79 |
+
##------------------- -------------Define Prompt-------------------------------------------
|
80 |
SYSTEM_PROMPT = """
|
81 |
+
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. \
|
82 |
+
You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports.\
|
83 |
+
Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
84 |
Guidelines:
|
85 |
- Passeges are provided as comma separated list of strings
|
86 |
- If the passages have useful facts or numbers, use them in your answer.
|
|
|
114 |
if model_config.get('reader','TYPE') == 'NVIDIA':
|
115 |
chat_model = nvidia_client()
|
116 |
async def process_stream():
|
117 |
+
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
118 |
+
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
119 |
+
# instead of modifying the one from the outer scope.
|
120 |
# Iterate over the streaming response chunks
|
121 |
response = chat_model.chat_completion(
|
122 |
model=model_config.get("reader","NVIDIA_MODEL"),
|
|
|
139 |
else:
|
140 |
chat_model = dedicated_endpoint()
|
141 |
async def process_stream():
|
142 |
+
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
143 |
+
# instead of modifying the one from the outer scope.
|
144 |
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
145 |
# Iterate over the streaming response chunks
|
146 |
async for chunk in chat_model.astream(messages):
|
|
|
172 |
"answer": history[-1][1],
|
173 |
"time": timestamp,
|
174 |
}
|
175 |
+
save_logs(scheduler,JSON_DATASET_PATH,logs)
|
176 |
except Exception as e:
|
177 |
logging.error(e)
|
178 |
|
179 |
|
180 |
|
181 |
|
182 |
+
#####-------------------------- Gradio App--------------------------------------####
|
183 |
|
184 |
# Set up Gradio Theme
|
185 |
theme = gr.themes.Base(
|
auditqa/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/auditqa/__pycache__/__init__.cpython-310.pyc and b/auditqa/__pycache__/__init__.cpython-310.pyc differ
|
|
auditqa/__pycache__/process_chunks.cpython-310.pyc
CHANGED
Binary files a/auditqa/__pycache__/process_chunks.cpython-310.pyc and b/auditqa/__pycache__/process_chunks.cpython-310.pyc differ
|
|
auditqa/__pycache__/reader.cpython-310.pyc
ADDED
Binary file (1.3 kB). View file
|
|
auditqa/__pycache__/reports.cpython-310.pyc
CHANGED
Binary files a/auditqa/__pycache__/reports.cpython-310.pyc and b/auditqa/__pycache__/reports.cpython-310.pyc differ
|
|
auditqa/__pycache__/retriever.cpython-310.pyc
ADDED
Binary file (1.88 kB). View file
|
|
auditqa/__pycache__/sample_questions.cpython-310.pyc
CHANGED
Binary files a/auditqa/__pycache__/sample_questions.cpython-310.pyc and b/auditqa/__pycache__/sample_questions.cpython-310.pyc differ
|
|
auditqa/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (2.71 kB). View file
|
|
auditqa/process_chunks.py
CHANGED
@@ -16,9 +16,13 @@ device = 'cuda' if cuda.is_available() else 'cpu'
|
|
16 |
path_to_data = "./reports/"
|
17 |
|
18 |
|
19 |
-
##---------------------
|
20 |
def getconfig(configfile_path:str):
|
21 |
"""
|
|
|
|
|
|
|
|
|
22 |
configfile_path: file path of .cfg file
|
23 |
"""
|
24 |
|
@@ -117,9 +121,7 @@ def get_local_qdrant():
|
|
117 |
model_kwargs = {'device': device},
|
118 |
encode_kwargs = {'normalize_embeddings': True},
|
119 |
model_name=config.get('retriever','MODEL'))
|
120 |
-
#list_ = ['Consolidated','District','Ministry','allreports']
|
121 |
-
#for val in list_:
|
122 |
client = QdrantClient(path="/data/local_qdrant")
|
123 |
-
print(client.get_collections())
|
124 |
qdrant_collections['allreports'] = Qdrant(client=client, collection_name='allreports', embeddings=embeddings, )
|
125 |
return qdrant_collections
|
|
|
16 |
path_to_data = "./reports/"
|
17 |
|
18 |
|
19 |
+
##---------------------functions -------------------------------------------##
|
20 |
def getconfig(configfile_path:str):
|
21 |
"""
|
22 |
+
Read the config file
|
23 |
+
|
24 |
+
Params
|
25 |
+
----------------
|
26 |
configfile_path: file path of .cfg file
|
27 |
"""
|
28 |
|
|
|
121 |
model_kwargs = {'device': device},
|
122 |
encode_kwargs = {'normalize_embeddings': True},
|
123 |
model_name=config.get('retriever','MODEL'))
|
|
|
|
|
124 |
client = QdrantClient(path="/data/local_qdrant")
|
125 |
+
print("Collections in local Qdrant:",client.get_collections())
|
126 |
qdrant_collections['allreports'] = Qdrant(client=client, collection_name='allreports', embeddings=embeddings, )
|
127 |
return qdrant_collections
|
auditqa/reader.py
CHANGED
@@ -13,13 +13,17 @@ HF_token = os.environ["LLAMA_3_1"]
|
|
13 |
|
14 |
|
15 |
def nvidia_client():
|
|
|
16 |
client = InferenceClient(
|
17 |
base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
|
18 |
api_key=NVIDIA_SERVER)
|
|
|
19 |
|
20 |
return client
|
21 |
|
22 |
def dedicated_endpoint():
|
|
|
|
|
23 |
# Set up the streaming callback handler
|
24 |
callback = StreamingStdOutCallbackHandler()
|
25 |
|
@@ -36,4 +40,5 @@ def dedicated_endpoint():
|
|
36 |
|
37 |
# Create a ChatHuggingFace instance with the streaming-enabled endpoint
|
38 |
chat_model = ChatHuggingFace(llm=llm_qa)
|
|
|
39 |
return chat_model
|
|
|
13 |
|
14 |
|
15 |
def nvidia_client():
|
16 |
+
""" returns the nvidia server client """
|
17 |
client = InferenceClient(
|
18 |
base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
|
19 |
api_key=NVIDIA_SERVER)
|
20 |
+
print("getting nvidia client")
|
21 |
|
22 |
return client
|
23 |
|
24 |
def dedicated_endpoint():
|
25 |
+
""" returns the dedicated server endpoint"""
|
26 |
+
|
27 |
# Set up the streaming callback handler
|
28 |
callback = StreamingStdOutCallbackHandler()
|
29 |
|
|
|
40 |
|
41 |
# Create a ChatHuggingFace instance with the streaming-enabled endpoint
|
42 |
chat_model = ChatHuggingFace(llm=llm_qa)
|
43 |
+
print("getting dedicated endpoint wrapped in ChathuggingFace ")
|
44 |
return chat_model
|
auditqa/retriever.py
CHANGED
@@ -3,14 +3,13 @@ from auditqa.process_chunks import getconfig
|
|
3 |
from langchain.retrievers import ContextualCompressionRetriever
|
4 |
from langchain.retrievers.document_compressors import CrossEncoderReranker
|
5 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
6 |
-
import logging
|
7 |
|
8 |
model_config = getconfig("model_params.cfg")
|
9 |
|
10 |
def create_filter(reports:list = [],sources:str =None,
|
11 |
subtype:str =None,year:str =None):
|
12 |
if len(reports) == 0:
|
13 |
-
print("defining filter for:{}:{}:{}".format(sources,subtype,year))
|
14 |
filter=rest.Filter(
|
15 |
must=[rest.FieldCondition(
|
16 |
key="metadata.source",
|
|
|
3 |
from langchain.retrievers import ContextualCompressionRetriever
|
4 |
from langchain.retrievers.document_compressors import CrossEncoderReranker
|
5 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
|
|
6 |
|
7 |
model_config = getconfig("model_params.cfg")
|
8 |
|
9 |
def create_filter(reports:list = [],sources:str =None,
|
10 |
subtype:str =None,year:str =None):
|
11 |
if len(reports) == 0:
|
12 |
+
print("defining filter for sources:{},subtype:{},year:{}".format(sources,subtype,year))
|
13 |
filter=rest.Filter(
|
14 |
must=[rest.FieldCondition(
|
15 |
key="metadata.source",
|
auditqa/utils.py
CHANGED
@@ -14,7 +14,7 @@ def save_logs(scheduler, JSON_DATASET_PATH, logs) -> None:
|
|
14 |
with JSON_DATASET_PATH.open("a") as f:
|
15 |
json.dump(logs, f)
|
16 |
f.write("\n")
|
17 |
-
|
18 |
|
19 |
def get_message_template(type, SYSTEM_PROMPT, USER_PROMPT):
|
20 |
if type == 'NVIDIA':
|
|
|
14 |
with JSON_DATASET_PATH.open("a") as f:
|
15 |
json.dump(logs, f)
|
16 |
f.write("\n")
|
17 |
+
print("logging done")
|
18 |
|
19 |
def get_message_template(type, SYSTEM_PROMPT, USER_PROMPT):
|
20 |
if type == 'NVIDIA':
|