LohithGummi commited on
Commit
9f73436
1 Parent(s): 5cd7346

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +147 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the necessary Libraries
2
+ import json
3
+ import uuid
4
+ import os
5
+
6
+ from openai import OpenAI
7
+ import gradio as gr
8
+
9
+ from langchain_community.embeddings.sentence_transformer import (
10
+ SentenceTransformerEmbeddings
11
+ )
12
+ from langchain_community.vectorstores import Chroma
13
+ from huggingface_hub import CommitScheduler
14
+
15
+
16
+ client = OpenAI(
17
+ base_url="https://api.endpoints.anyscale.com/v1",
18
+ api_key=os.environ['ANYSCALE_API_KEY']
19
+ )
20
+
21
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
22
+
23
+ streamlit_collection = 'ai_collection'
24
+
25
+ vectorstore_persisted = Chroma(
26
+ collection_name=streamlit_collection,
27
+ persist_directory='./reports_db',
28
+ embedding_function=embedding_model
29
+ )
30
+
31
+ retriever = vectorstore_persisted.as_retriever(
32
+ search_type='similarity',
33
+ search_kwargs={'k': 5}
34
+ )
35
+
36
+ # Prepare the logging functionality
37
+
38
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
39
+ log_folder = log_file.parent
40
+
41
+ scheduler = CommitScheduler(
42
+ repo_id="reports-qna",
43
+ repo_type="dataset",
44
+ folder_path=log_folder,
45
+ path_in_repo="data",
46
+ every=2
47
+ )
48
+
49
+ qna_system_message = """
50
+ You are an assistant to a Financial Analyst. Your task is to summarize and provide relevant information to the financial analyst's question based on the provided context.
51
+
52
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
53
+ The context contains references to specific portions of documents relevant to the user's query, along with page number from the report.
54
+ The source for the context will begin with the token ###Page
55
+
56
+ When crafting your response:
57
+ 1. Select only context relevant to answer the question.
58
+ 2. Include the source links in your response.
59
+ 3. User questions will begin with the token: ###Question.
60
+ 4. If the question is irrelevant or if you do not have the information to respond with - "Sorry, this is out of my knowledge base"
61
+
62
+ Please adhere to the following guidelines:
63
+ - Your response should only be about the question asked and nothing else.
64
+ - Answer only using the context provided.
65
+ - Do not mention anything about the context in your final answer.
66
+ - If the answer is not found in the context, it is very very important for you to respond with "Sorry, this is out of my knowledge base"
67
+ - Always quote the page number when you use the context. Cite the relevant page number at the end of your response under the section - Page:
68
+ - Do not make up sources Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
69
+
70
+ Here is an example of how to structure your response:
71
+
72
+ Answer:
73
+ [Answer]
74
+
75
+ Page:
76
+ [Page number]
77
+ """
78
+
79
+ qna_user_message_template = """
80
+ ###Context
81
+ Here are some documents and their page number that are relevant to the question mentioned below.
82
+ {context}
83
+
84
+ ###Question
85
+ {question}
86
+ """
87
+
88
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
89
+ def predict(user_input,company):
90
+
91
+ filter = "dataset/"+company+"-10-k-2023.pdf"
92
+ relevant_document_chunks = retriever.invoke(user_input)
93
+ context_list = [d.page_content + "\n ###Page: " + str(d.metadata['page']) + "\n\n " for d in relevant_document_chunks]
94
+ context_for_query = ".".join(context_list)
95
+
96
+ prompt = [
97
+ {'role':'system', 'content': qna_system_message},
98
+ {'role': 'user', 'content': qna_user_message_template.format(
99
+ context=context_for_query,
100
+ question=user_input
101
+ )
102
+ }
103
+ ]
104
+
105
+ try:
106
+ response = client.chat.completions.create(
107
+ model='mistralai/Mixtral-8x7B-Instruct-v0.1',
108
+ messages=prompt,
109
+ temperature=0
110
+ )
111
+
112
+ prediction = response.choices[0].message.content
113
+
114
+ except Exception as e:
115
+ prediction = e
116
+
117
+ # While the prediction is made, log both the inputs and outputs to a local log file
118
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
119
+ # access
120
+
121
+ with scheduler.lock:
122
+ with log_file.open("a") as f:
123
+ f.write(json.dumps(
124
+ {
125
+ 'user_input': user_input,
126
+ 'retrieved_context': context_for_query,
127
+ 'model_response': prediction
128
+ }
129
+ ))
130
+ f.write("\n")
131
+
132
+ return prediction
133
+
134
+
135
+ textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
136
+ company = gr.Radio(choices=["google", "msft", "aws", "ibm", "meta"], label="Select an option")
137
+
138
+ # Create the interface
139
+ demo = gr.Interface(
140
+ inputs={"textbox": textbox, "options": company}, fn=predict, outputs="text",
141
+ title="!0-k Reports Q&A System",
142
+ description="This web API presents an interface to ask questions on 10-k reports ",
143
+ concurrency_limit=16
144
+ )
145
+
146
+ demo.queue()
147
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai==1.23.2
2
+ chromadb==0.4.22
3
+ langchain==0.1.9
4
+ langchain-community==0.0.32
5
+ sentence-transformers==2.3.1