LohithGummi commited on
Commit
2bcfcf7
·
verified ·
1 Parent(s): d4579b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -142
app.py CHANGED
@@ -1,143 +1,159 @@
1
- # Import the necessary Libraries
2
- import json
3
- import uuid
4
- import os
5
-
6
- from openai import OpenAI
7
- import gradio as gr
8
-
9
- from langchain_community.embeddings.sentence_transformer import (
10
- SentenceTransformerEmbeddings
11
- )
12
- from langchain_community.vectorstores import Chroma
13
- from huggingface_hub import CommitScheduler
14
- from pathlib import Path
15
-
16
-
17
- client = OpenAI(
18
- base_url="https://api.endpoints.anyscale.com/v1",
19
- api_key=os.environ['ANYSCALE_API_KEY']
20
- )
21
-
22
- embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
23
-
24
- streamlit_collection = 'reports_collection'
25
-
26
- vectorstore_persisted = Chroma(
27
- collection_name=streamlit_collection,
28
- persist_directory='./reports_db',
29
- embedding_function=embedding_model
30
- )
31
-
32
- # Prepare the logging functionality
33
-
34
- log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
35
- log_folder = log_file.parent
36
-
37
- scheduler = CommitScheduler(
38
- repo_id="reports-qna",
39
- repo_type="dataset",
40
- folder_path=log_folder,
41
- path_in_repo="data",
42
- every=2
43
- )
44
-
45
- qna_system_message = """
46
- You are an assistant to a Financial Analyst. Your task is to summarize and provide relevant information to the financial analyst's question based on the provided context.
47
-
48
- User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
49
- The context contains references to specific portions of documents relevant to the user's query, along with page number from the report.
50
- The source for the context will begin with the token ###Page
51
-
52
- When crafting your response:
53
- 1. Select only context relevant to answer the question.
54
- 2. Include the source links in your response.
55
- 3. User questions will begin with the token: ###Question.
56
- 4. If the question is irrelevant or if you do not have the information to respond with - "Sorry, this is out of my knowledge base"
57
-
58
- Please adhere to the following guidelines:
59
- - Your response should only be about the question asked and nothing else.
60
- - Answer only using the context provided.
61
- - Do not mention anything about the context in your final answer.
62
- - If the answer is not found in the context, it is very very important for you to respond with "Sorry, this is out of my knowledge base"
63
- - Always quote the page number when you use the context. Cite the relevant page number at the end of your response under the section - Page:
64
- - Do not make up sources Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
65
-
66
- Here is an example of how to structure your response:
67
-
68
- Answer:
69
- [Answer]
70
-
71
- Page:
72
- [Page number]
73
- """
74
-
75
- qna_user_message_template = """
76
- ###Context
77
- Here are some documents and their page number that are relevant to the question mentioned below.
78
- {context}
79
-
80
- ###Question
81
- {question}
82
- """
83
-
84
- # Define the predict function that runs when 'Submit' is clicked or when a API request is made
85
- def predict(user_input,company):
86
-
87
- filter = "dataset/"+company+"-10-k-2023.pdf"
88
- relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter})
89
- context_list = [d.page_content + "\n ###Page: " + str(d.metadata['page']) + "\n\n " for d in relevant_document_chunks]
90
- context_for_query = ".".join(context_list)
91
-
92
- prompt = [
93
- {'role':'system', 'content': qna_system_message},
94
- {'role': 'user', 'content': qna_user_message_template.format(
95
- context=context_for_query,
96
- question=user_input
97
- )
98
- }
99
- ]
100
-
101
- try:
102
- response = client.chat.completions.create(
103
- model='mistralai/Mixtral-8x7B-Instruct-v0.1',
104
- messages=prompt,
105
- temperature=0
106
- )
107
-
108
- prediction = response.choices[0].message.content
109
-
110
- except Exception as e:
111
- prediction = e
112
-
113
- # While the prediction is made, log both the inputs and outputs to a local log file
114
- # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
115
- # access
116
-
117
- with scheduler.lock:
118
- with log_file.open("a") as f:
119
- f.write(json.dumps(
120
- {
121
- 'user_input': user_input,
122
- 'retrieved_context': context_for_query,
123
- 'model_response': prediction
124
- }
125
- ))
126
- f.write("\n")
127
-
128
- return prediction
129
-
130
-
131
- textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
132
- company = gr.Radio(choices=["google", "msft", "aws", "ibm", "meta"], label="Select the company")
133
-
134
- # Create the interface
135
- demo = gr.Interface(
136
- inputs=[textbox, company], fn=predict, outputs="text",
137
- title="10-k Reports Q&A System",
138
- description="This web API presents an interface to ask questions on 10-k reports ",
139
- concurrency_limit=16
140
- )
141
-
142
- demo.queue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  demo.launch()
 
1
+ # Import the necessary Libraries
2
+ import json
3
+ import uuid
4
+ import os
5
+
6
+ from openai import OpenAI
7
+ import gradio as gr
8
+
9
+ from langchain_community.embeddings.sentence_transformer import (
10
+ SentenceTransformerEmbeddings
11
+ )
12
+ from langchain_community.vectorstores import Chroma
13
+ from huggingface_hub import CommitScheduler
14
+ from pathlib import Path
15
+
16
+
17
+ import os
18
+
19
+ os.environ['OPENAI_API_KEY'] = "gl-U2FsdGVkX18e2Pmna5tn6g6u7mqi55sN7xcOMntKGypQnR3Y4CQK5VfbJYc0Nt7c"
20
+
21
+ os.environ["OPENAI_BASE_URL"] = "https://aibe.mygreatlearning.com/openai/v1"
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+ # Create Client
30
+
31
+ client = OpenAI()
32
+
33
+
34
+
35
+
36
+ model_name = 'gpt-4o-mini'
37
+
38
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
39
+
40
+ streamlit_collection = 'reports_collection'
41
+
42
+ vectorstore_persisted = Chroma(
43
+ collection_name=streamlit_collection,
44
+ persist_directory='./reports_db',
45
+ embedding_function=embedding_model
46
+ )
47
+
48
+ # Prepare the logging functionality
49
+
50
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
51
+ log_folder = log_file.parent
52
+
53
+ scheduler = CommitScheduler(
54
+ repo_id="reports-qna",
55
+ repo_type="dataset",
56
+ folder_path=log_folder,
57
+ path_in_repo="data",
58
+ every=2
59
+ )
60
+
61
+ qna_system_message = """
62
+ You are an assistant to a Financial Analyst. Your task is to summarize and provide relevant information to the financial analyst's question based on the provided context.
63
+
64
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
65
+ The context contains references to specific portions of documents relevant to the user's query, along with page number from the report.
66
+ The source for the context will begin with the token ###Page
67
+
68
+ When crafting your response:
69
+ 1. Select only context relevant to answer the question.
70
+ 2. Include the source links in your response.
71
+ 3. User questions will begin with the token: ###Question.
72
+ 4. If the question is irrelevant or if you do not have the information to respond with - "Sorry, this is out of my knowledge base"
73
+
74
+ Please adhere to the following guidelines:
75
+ - Your response should only be about the question asked and nothing else.
76
+ - Answer only using the context provided.
77
+ - Do not mention anything about the context in your final answer.
78
+ - If the answer is not found in the context, it is very very important for you to respond with "Sorry, this is out of my knowledge base"
79
+ - Always quote the page number when you use the context. Cite the relevant page number at the end of your response under the section - Page:
80
+ - Do not make up sources Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
81
+
82
+ Here is an example of how to structure your response:
83
+
84
+ Answer:
85
+ [Answer]
86
+
87
+ Page:
88
+ [Page number]
89
+ """
90
+
91
+ qna_user_message_template = """
92
+ ###Context
93
+ Here are some documents and their page number that are relevant to the question mentioned below.
94
+ {context}
95
+
96
+ ###Question
97
+ {question}
98
+ """
99
+
100
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
101
+ def predict(user_input,company):
102
+
103
+ filter = "dataset/"+company+"-10-k-2023.pdf"
104
+ relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter})
105
+ context_list = [d.page_content + "\n ###Page: " + str(d.metadata['page']) + "\n\n " for d in relevant_document_chunks]
106
+ context_for_query = ".".join(context_list)
107
+
108
+ prompt = [
109
+ {'role':'system', 'content': qna_system_message},
110
+ {'role': 'user', 'content': qna_user_message_template.format(
111
+ context=context_for_query,
112
+ question=user_input
113
+ )
114
+ }
115
+ ]
116
+
117
+ try:
118
+ response = client.chat.completions.create(
119
+ model='mistralai/Mixtral-8x7B-Instruct-v0.1',
120
+ messages=prompt,
121
+ temperature=0
122
+ )
123
+
124
+ prediction = response.choices[0].message.content
125
+
126
+ except Exception as e:
127
+ prediction = e
128
+
129
+ # While the prediction is made, log both the inputs and outputs to a local log file
130
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
131
+ # access
132
+
133
+ with scheduler.lock:
134
+ with log_file.open("a") as f:
135
+ f.write(json.dumps(
136
+ {
137
+ 'user_input': user_input,
138
+ 'retrieved_context': context_for_query,
139
+ 'model_response': prediction
140
+ }
141
+ ))
142
+ f.write("\n")
143
+
144
+ return prediction
145
+
146
+
147
+ textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
148
+ company = gr.Radio(choices=["google", "msft", "aws", "ibm", "meta"], label="Select the company")
149
+
150
+ # Create the interface
151
+ demo = gr.Interface(
152
+ inputs=[textbox, company], fn=predict, outputs="text",
153
+ title="10-k Reports Q&A System",
154
+ description="This web API presents an interface to ask questions on 10-k reports ",
155
+ concurrency_limit=16
156
+ )
157
+
158
+ demo.queue()
159
  demo.launch()