Spaces:
Running
Running
second commit
Browse files- EarningsTranscripts (PDF)/AAPL/Apple (AAPL) Q2 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) CEO Tim Cook on Q1 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) CEO Tim Cook on Q2 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) Q3 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) Q4 2022 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) Q4 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple, Inc. (AAPL) CEO Tim Cook on Q3 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/AAPL/Apple, Inc. (AAPL) Q1 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) CEO Sundar Pichai on Q2 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q1 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q2 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q3 2022 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q3 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q4 2022 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/GOOG/Alphabet Inc.'s (GOOG) CEO Sundar Pichai on Q1 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) CEO Satya Nadella on Q1 Fiscal 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) CEO Satya Nadella on Q4 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q1 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q1 2024 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q2 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q3 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q4 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft Corporation's (MSFT) CEO Satya Nadella on Q2 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/MSFT/Microsoft's (MSFT) CEO Satya Nadella on Q3 2022 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corp. (NVDA) Q1 2024 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corp. (NVDA) Q2 2024 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corp. (NVDA) Q4 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) CEO Jensen Huang On Q1 2023 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) CEO Jensen Huang on Q2 2023 Results - Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) Q3 2023 Earnings Call Transcript.pdf +0 -0
- EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) Q3 2024 Earnings Call Transcript.pdf +0 -0
- app.py +13 -107
- earnings_app.py +186 -0
- requirements.txt +4 -0
- talking_app.py +115 -0
EarningsTranscripts (PDF)/AAPL/Apple (AAPL) Q2 2023 Earnings Call Transcript.pdf
ADDED
Binary file (138 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) CEO Tim Cook on Q1 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (133 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) CEO Tim Cook on Q2 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (140 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) Q3 2023 Earnings Call Transcript.pdf
ADDED
Binary file (137 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) Q4 2022 Earnings Call Transcript.pdf
ADDED
Binary file (144 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple Inc. (AAPL) Q4 2023 Earnings Call Transcript.pdf
ADDED
Binary file (145 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple, Inc. (AAPL) CEO Tim Cook on Q3 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (135 kB). View file
|
|
EarningsTranscripts (PDF)/AAPL/Apple, Inc. (AAPL) Q1 2023 Earnings Call Transcript.pdf
ADDED
Binary file (133 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) CEO Sundar Pichai on Q2 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (135 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q1 2023 Earnings Call Transcript.pdf
ADDED
Binary file (144 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q2 2023 Earnings Call Transcript.pdf
ADDED
Binary file (142 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q3 2022 Earnings Call Transcript.pdf
ADDED
Binary file (140 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q3 2023 Earnings Call Transcript.pdf
ADDED
Binary file (133 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc. (GOOG) Q4 2022 Earnings Call Transcript.pdf
ADDED
Binary file (137 kB). View file
|
|
EarningsTranscripts (PDF)/GOOG/Alphabet Inc.'s (GOOG) CEO Sundar Pichai on Q1 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (133 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) CEO Satya Nadella on Q1 Fiscal 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (150 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) CEO Satya Nadella on Q4 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (148 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q1 2023 Earnings Call Transcript.pdf
ADDED
Binary file (154 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q1 2024 Earnings Call Transcript.pdf
ADDED
Binary file (148 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q2 2023 Earnings Call Transcript.pdf
ADDED
Binary file (149 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q3 2023 Earnings Call Transcript.pdf
ADDED
Binary file (150 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation (MSFT) Q4 2023 Earnings Call Transcript.pdf
ADDED
Binary file (155 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft Corporation's (MSFT) CEO Satya Nadella on Q2 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (151 kB). View file
|
|
EarningsTranscripts (PDF)/MSFT/Microsoft's (MSFT) CEO Satya Nadella on Q3 2022 Results - Earnings Call Transcript.pdf
ADDED
Binary file (157 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corp. (NVDA) Q1 2024 Earnings Call Transcript.pdf
ADDED
Binary file (149 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corp. (NVDA) Q2 2024 Earnings Call Transcript.pdf
ADDED
Binary file (139 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corp. (NVDA) Q4 2023 Earnings Call Transcript.pdf
ADDED
Binary file (132 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) CEO Jensen Huang On Q1 2023 Results - Earnings Call Transcript.pdf
ADDED
Binary file (140 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) CEO Jensen Huang on Q2 2023 Results - Earnings Call Transcript.pdf
ADDED
Binary file (137 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) Q3 2023 Earnings Call Transcript.pdf
ADDED
Binary file (135 kB). View file
|
|
EarningsTranscripts (PDF)/NVDA/NVIDIA Corporation (NVDA) Q3 2024 Earnings Call Transcript.pdf
ADDED
Binary file (148 kB). View file
|
|
app.py
CHANGED
@@ -1,115 +1,21 @@
|
|
1 |
-
import chainlit as cl
|
2 |
-
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
-
from langchain.document_loaders.csv_loader import CSVLoader
|
4 |
-
from langchain.embeddings import CacheBackedEmbeddings
|
5 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
-
from langchain.vectorstores import FAISS
|
7 |
-
from langchain.chains import RetrievalQA
|
8 |
-
from langchain.chat_models import ChatOpenAI
|
9 |
-
from langchain.storage import LocalFileStore
|
10 |
-
from langchain.prompts.chat import (
|
11 |
-
ChatPromptTemplate,
|
12 |
-
SystemMessagePromptTemplate,
|
13 |
-
HumanMessagePromptTemplate,
|
14 |
-
)
|
15 |
-
import chainlit as cl
|
16 |
|
17 |
-
|
|
|
18 |
|
19 |
-
|
20 |
-
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
21 |
-
# You can make inferences based on the context as long as it still faithfully represents the feedback.
|
22 |
|
23 |
-
|
24 |
-
Use the following pieces of context to answer the user's question.
|
25 |
-
Please respond as if you are "RoaringKitty" a Youtuber known for detailed posts and videos on social media platforms like Reddit (particularly the WallStreetBets subreddit) and YouTube, where he shared his investment strategies and analysis .
|
26 |
-
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
27 |
-
You can make inferences based on the context as long as it still faithfully represents the feedback.
|
28 |
-
Example of your response should be:
|
29 |
-
|
30 |
-
```
|
31 |
-
The answer is foo
|
32 |
-
```
|
33 |
-
|
34 |
-
Begin!
|
35 |
-
----------------
|
36 |
-
{context}"""
|
37 |
-
|
38 |
-
messages = [
|
39 |
-
SystemMessagePromptTemplate.from_template(system_template),
|
40 |
-
HumanMessagePromptTemplate.from_template("{question}"),
|
41 |
-
]
|
42 |
-
prompt = ChatPromptTemplate(messages=messages)
|
43 |
-
chain_type_kwargs = {"prompt": prompt}
|
44 |
-
|
45 |
-
@cl.author_rename
|
46 |
-
def rename(orig_author: str):
|
47 |
-
diamond_char = u'\U0001F537'
|
48 |
-
phrase = diamond_char + " Diamond Hands " + diamond_char
|
49 |
-
rename_dict = {"RetrievalQA": phrase}
|
50 |
-
return rename_dict.get(orig_author, orig_author)
|
51 |
|
52 |
@cl.on_chat_start
|
53 |
-
async def
|
54 |
-
|
55 |
-
await
|
56 |
-
|
57 |
-
# build FAISS index from csv
|
58 |
-
loader = CSVLoader(file_path="./data/roaringkitty.csv", source_column="Link")
|
59 |
-
data = loader.load()
|
60 |
-
documents = text_splitter.transform_documents(data)
|
61 |
-
store = LocalFileStore("./cache/")
|
62 |
-
core_embeddings_model = OpenAIEmbeddings()
|
63 |
-
embedder = CacheBackedEmbeddings.from_bytes_store(
|
64 |
-
core_embeddings_model, store, namespace=core_embeddings_model.model
|
65 |
-
)
|
66 |
-
# make async docsearch
|
67 |
-
docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)
|
68 |
-
|
69 |
-
chain = RetrievalQA.from_chain_type(
|
70 |
-
ChatOpenAI(model="gpt-4", temperature=0, streaming=True),
|
71 |
-
chain_type="stuff",
|
72 |
-
return_source_documents=True,
|
73 |
-
retriever=docsearch.as_retriever(),
|
74 |
-
chain_type_kwargs = {"prompt": prompt}
|
75 |
-
)
|
76 |
-
|
77 |
-
msg.content = f"Index built!"
|
78 |
-
await msg.send()
|
79 |
-
|
80 |
-
cl.user_session.set("chain", chain)
|
81 |
-
|
82 |
|
83 |
@cl.on_message
|
84 |
-
async def main(message):
|
85 |
chain = cl.user_session.get("chain")
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
answer = res["result"]
|
93 |
-
source_elements = []
|
94 |
-
visited_sources = set()
|
95 |
-
|
96 |
-
# Get the documents from the user session
|
97 |
-
docs = res["source_documents"]
|
98 |
-
metadatas = [doc.metadata for doc in docs]
|
99 |
-
all_sources = [m["source"] for m in metadatas]
|
100 |
-
|
101 |
-
for source in all_sources:
|
102 |
-
if source in visited_sources:
|
103 |
-
continue
|
104 |
-
visited_sources.add(source)
|
105 |
-
# Create the text element referenced in the message
|
106 |
-
source_elements.append(
|
107 |
-
cl.Text(content="https://www.youtube.com/watch?" + source, name="Link to Video")
|
108 |
-
)
|
109 |
-
|
110 |
-
if source_elements:
|
111 |
-
answer += f"\nSources: {', '.join([e.content.decode('utf-8') for e in source_elements])}"
|
112 |
-
else:
|
113 |
-
answer += "\nNo sources found"
|
114 |
-
|
115 |
-
await cl.Message(content=answer, elements=source_elements).send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
+
import chainlet as cl
|
3 |
+
import sys
|
4 |
|
5 |
+
sys.path.append(".")
|
|
|
|
|
6 |
|
7 |
+
from earnings_app import extract_information
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
@cl.on_chat_start
|
10 |
+
async def start():
|
11 |
+
cl.user_session.set("chain", extract_information())
|
12 |
+
await cl.Message(content="Welcome to Earnings chat!").send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
@cl.on_message
|
15 |
+
async def main(message: cl.Message):
|
16 |
chain = cl.user_session.get("chain")
|
17 |
+
res = chain.chat(message)
|
18 |
+
# res = await chain.aiinvoke({"input": message})
|
19 |
+
# res = res["text"]
|
20 |
+
out = ''.join(str(res))
|
21 |
+
await cl.Message(content=out).send()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
earnings_app.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
# Imports
|
3 |
+
import asyncio
|
4 |
+
import os
|
5 |
+
import openai
|
6 |
+
|
7 |
+
from typing import List, Optional
|
8 |
+
from pydantic import BaseModel, Field
|
9 |
+
|
10 |
+
# from langchain.prompts import ChatPromptTemplate
|
11 |
+
from langchain.pydantic_v1 import BaseModel
|
12 |
+
# from langchain.utils.openai_functions import convert_pydantic_to_openai_function
|
13 |
+
from llama_index.tools import FunctionTool
|
14 |
+
from llama_index.vector_stores.types import (
|
15 |
+
VectorStoreInfo,
|
16 |
+
MetadataInfo,
|
17 |
+
ExactMatchFilter,
|
18 |
+
MetadataFilters,
|
19 |
+
)
|
20 |
+
from llama_index.agent import OpenAIAgent
|
21 |
+
from llama_index.retrievers import VectorIndexRetriever
|
22 |
+
from llama_index.query_engine import RetrieverQueryEngine
|
23 |
+
|
24 |
+
from typing import List, Tuple, Any
|
25 |
+
from pydantic import BaseModel, Field
|
26 |
+
from llama_index import load_index_from_storage
|
27 |
+
from llama_index import set_global_handler
|
28 |
+
import llama_index
|
29 |
+
from llama_index.embeddings import OpenAIEmbedding
|
30 |
+
from llama_index import ServiceContext
|
31 |
+
from llama_index.llms import OpenAI
|
32 |
+
from llama_index.ingestion import IngestionPipeline
|
33 |
+
from llama_index.node_parser import TokenTextSplitter
|
34 |
+
|
35 |
+
set_global_handler("wandb", run_args={"project": "final-project-v1"})
|
36 |
+
wandb_callback = llama_index.global_handler
|
37 |
+
|
38 |
+
from dotenv import load_dotenv
|
39 |
+
load_dotenv()
|
40 |
+
|
41 |
+
openai.api_key = os.environ['OPENAI_API_KEY']
|
42 |
+
|
43 |
+
top_k = 3
|
44 |
+
|
45 |
+
vector_store_info = VectorStoreInfo(
|
46 |
+
content_info="transcripts of earnings calls",
|
47 |
+
metadata_info=[MetadataInfo(
|
48 |
+
name="title",
|
49 |
+
type="str",
|
50 |
+
description="Title of the earnings call",
|
51 |
+
),
|
52 |
+
MetadataInfo(
|
53 |
+
name="period",
|
54 |
+
type="str",
|
55 |
+
description="Period of the earnings call"
|
56 |
+
),
|
57 |
+
MetadataInfo(
|
58 |
+
name="ticker",
|
59 |
+
type="str",
|
60 |
+
description="Ticker of the company"
|
61 |
+
),
|
62 |
+
MetadataInfo(
|
63 |
+
name="year",
|
64 |
+
type="str",
|
65 |
+
description="Year of the earnings call"
|
66 |
+
),
|
67 |
+
MetadataInfo(
|
68 |
+
name="quarter",
|
69 |
+
type="str",
|
70 |
+
description="Quarter of the earnings call"
|
71 |
+
),
|
72 |
+
MetadataInfo(
|
73 |
+
name="path",
|
74 |
+
type="str",
|
75 |
+
description="Path to the earnings call"
|
76 |
+
),
|
77 |
+
])
|
78 |
+
|
79 |
+
class AutoRetrieveModel(BaseModel):
|
80 |
+
query: str = Field(..., description="natural language query string")
|
81 |
+
filter_key_list: List[str] = Field(
|
82 |
+
..., description="List of metadata filter field names"
|
83 |
+
)
|
84 |
+
filter_value_list: List[str] = Field(
|
85 |
+
...,
|
86 |
+
description=(
|
87 |
+
"List of metadata filter field values (corresponding to names specified in filter_key_list)"
|
88 |
+
)
|
89 |
+
)
|
90 |
+
|
91 |
+
embed_model = OpenAIEmbedding()
|
92 |
+
chunk_size = 500
|
93 |
+
|
94 |
+
llm = OpenAI(
|
95 |
+
temperature=0,
|
96 |
+
model="gpt-4-1106-preview" ### YOUR CODE HERE
|
97 |
+
)
|
98 |
+
|
99 |
+
service_context = ServiceContext.from_defaults(
|
100 |
+
llm=llm,
|
101 |
+
chunk_size=chunk_size,
|
102 |
+
embed_model=embed_model,
|
103 |
+
)
|
104 |
+
|
105 |
+
text_splitter = TokenTextSplitter(
|
106 |
+
chunk_size=chunk_size
|
107 |
+
)
|
108 |
+
|
109 |
+
node_parser_pipeline = IngestionPipeline(
|
110 |
+
transformations=[text_splitter]
|
111 |
+
)
|
112 |
+
|
113 |
+
storage_context = wandb_callback.load_storage_context(
|
114 |
+
artifact_url="llmop/final-project-v1/earnings-index:v0"
|
115 |
+
)
|
116 |
+
|
117 |
+
index = load_index_from_storage(storage_context, service_context=service_context)
|
118 |
+
|
119 |
+
def auto_retrieve_fn(
|
120 |
+
query: str, filter_key_list: List[str], filter_value_list: List[str]
|
121 |
+
):
|
122 |
+
"""Auto retrieval function.
|
123 |
+
|
124 |
+
Performs auto-retrieval from a vector database, and then applies a set of filters.
|
125 |
+
|
126 |
+
"""
|
127 |
+
query = query or "Query"
|
128 |
+
|
129 |
+
exact_match_filters = [
|
130 |
+
ExactMatchFilter(key=k, value=v)
|
131 |
+
for k, v in zip(filter_key_list, filter_value_list)
|
132 |
+
]
|
133 |
+
retriever = VectorIndexRetriever(
|
134 |
+
index, filters=MetadataFilters(filters=exact_match_filters), top_k=top_k
|
135 |
+
)
|
136 |
+
query_engine = RetrieverQueryEngine.from_args(retriever, service_context=service_context)
|
137 |
+
|
138 |
+
response = query_engine.query(query)
|
139 |
+
return str(response)
|
140 |
+
|
141 |
+
# App
|
142 |
+
|
143 |
+
# Pydantic is an easy way to define a schema
|
144 |
+
class AutoRetrieveModel(BaseModel):
|
145 |
+
query: str = Field(..., description="natural language query string")
|
146 |
+
filter_key_list: List[str] = Field(
|
147 |
+
..., description="List of metadata filter field names"
|
148 |
+
)
|
149 |
+
filter_value_list: List[str] = Field(
|
150 |
+
...,
|
151 |
+
description=(
|
152 |
+
"List of metadata filter field values (corresponding to names specified in filter_key_list)"
|
153 |
+
)
|
154 |
+
)
|
155 |
+
|
156 |
+
# Main function to extract information
|
157 |
+
def extract_information():
|
158 |
+
# Make sure to use a recent model that supports tools
|
159 |
+
|
160 |
+
auto_retrieve_tool = FunctionTool.from_defaults(
|
161 |
+
fn=auto_retrieve_fn,
|
162 |
+
name="earnings-transcripts",
|
163 |
+
description="Earnings Bot",
|
164 |
+
fn_schema=AutoRetrieveModel
|
165 |
+
)
|
166 |
+
|
167 |
+
agent = OpenAIAgent.from_tools(
|
168 |
+
tools=[auto_retrieve_tool],
|
169 |
+
)
|
170 |
+
|
171 |
+
return agent
|
172 |
+
|
173 |
+
|
174 |
+
if __name__ == "__main__":
|
175 |
+
text = "Who is the CEO of MSFT."
|
176 |
+
chain = extract_information()
|
177 |
+
print(str(chain.chat(text)))
|
178 |
+
|
179 |
+
async def extract_information_async(message: str):
|
180 |
+
return str(chain.chat(text))
|
181 |
+
|
182 |
+
async def main():
|
183 |
+
res = await extract_information_async(text)
|
184 |
+
print(res)
|
185 |
+
|
186 |
+
asyncio.run(main())
|
requirements.txt
CHANGED
@@ -3,3 +3,7 @@ langchain==0.0.265
|
|
3 |
tiktoken==0.4.0
|
4 |
openai==0.27.8
|
5 |
faiss-cpu==1.7.4
|
|
|
|
|
|
|
|
|
|
3 |
tiktoken==0.4.0
|
4 |
openai==0.27.8
|
5 |
faiss-cpu==1.7.4
|
6 |
+
llama-index
|
7 |
+
cohere
|
8 |
+
wandb
|
9 |
+
pydantic==1.10.11
|
talking_app.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chainlit as cl
|
2 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
4 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
+
from langchain.vectorstores import FAISS
|
7 |
+
from langchain.chains import RetrievalQA
|
8 |
+
from langchain.chat_models import ChatOpenAI
|
9 |
+
from langchain.storage import LocalFileStore
|
10 |
+
from langchain.prompts.chat import (
|
11 |
+
ChatPromptTemplate,
|
12 |
+
SystemMessagePromptTemplate,
|
13 |
+
HumanMessagePromptTemplate,
|
14 |
+
)
|
15 |
+
import chainlit as cl
|
16 |
+
|
17 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
18 |
+
|
19 |
+
# Please respond as if you were Ken from the movie Barbie. Ken is a well-meaning but naive character who loves to Beach. He talks like a typical Californian Beach Bro, but he doesn't use the word "Dude" so much.
|
20 |
+
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
21 |
+
# You can make inferences based on the context as long as it still faithfully represents the feedback.
|
22 |
+
|
23 |
+
system_template = """
|
24 |
+
Use the following pieces of context to answer the user's question.
|
25 |
+
Please respond as if you are "RoaringKitty" a Youtuber known for detailed posts and videos on social media platforms like Reddit (particularly the WallStreetBets subreddit) and YouTube, where he shared his investment strategies and analysis .
|
26 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
27 |
+
You can make inferences based on the context as long as it still faithfully represents the feedback.
|
28 |
+
Example of your response should be:
|
29 |
+
|
30 |
+
```
|
31 |
+
The answer is foo
|
32 |
+
```
|
33 |
+
|
34 |
+
Begin!
|
35 |
+
----------------
|
36 |
+
{context}"""
|
37 |
+
|
38 |
+
messages = [
|
39 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
40 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
41 |
+
]
|
42 |
+
prompt = ChatPromptTemplate(messages=messages)
|
43 |
+
chain_type_kwargs = {"prompt": prompt}
|
44 |
+
|
45 |
+
@cl.author_rename
|
46 |
+
def rename(orig_author: str):
|
47 |
+
diamond_char = u'\U0001F537'
|
48 |
+
phrase = diamond_char + " Diamond Hands " + diamond_char
|
49 |
+
rename_dict = {"RetrievalQA": phrase}
|
50 |
+
return rename_dict.get(orig_author, orig_author)
|
51 |
+
|
52 |
+
@cl.on_chat_start
|
53 |
+
async def init():
|
54 |
+
msg = cl.Message(content=f"Building Index...")
|
55 |
+
await msg.send()
|
56 |
+
|
57 |
+
# build FAISS index from csv
|
58 |
+
loader = CSVLoader(file_path="./data/roaringkitty.csv", source_column="Link")
|
59 |
+
data = loader.load()
|
60 |
+
documents = text_splitter.transform_documents(data)
|
61 |
+
store = LocalFileStore("./cache/")
|
62 |
+
core_embeddings_model = OpenAIEmbeddings()
|
63 |
+
embedder = CacheBackedEmbeddings.from_bytes_store(
|
64 |
+
core_embeddings_model, store, namespace=core_embeddings_model.model
|
65 |
+
)
|
66 |
+
# make async docsearch
|
67 |
+
docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)
|
68 |
+
|
69 |
+
chain = RetrievalQA.from_chain_type(
|
70 |
+
ChatOpenAI(model="gpt-4", temperature=0, streaming=True),
|
71 |
+
chain_type="stuff",
|
72 |
+
return_source_documents=True,
|
73 |
+
retriever=docsearch.as_retriever(),
|
74 |
+
chain_type_kwargs = {"prompt": prompt}
|
75 |
+
)
|
76 |
+
|
77 |
+
msg.content = f"Index built!"
|
78 |
+
await msg.send()
|
79 |
+
|
80 |
+
cl.user_session.set("chain", chain)
|
81 |
+
|
82 |
+
|
83 |
+
@cl.on_message
|
84 |
+
async def main(message):
|
85 |
+
chain = cl.user_session.get("chain")
|
86 |
+
cb = cl.AsyncLangchainCallbackHandler(
|
87 |
+
stream_final_answer=False, answer_prefix_tokens=["FINAL", "ANSWER"]
|
88 |
+
)
|
89 |
+
cb.answer_reached = True
|
90 |
+
res = await chain.acall(message, callbacks=[cb], )
|
91 |
+
|
92 |
+
answer = res["result"]
|
93 |
+
source_elements = []
|
94 |
+
visited_sources = set()
|
95 |
+
|
96 |
+
# Get the documents from the user session
|
97 |
+
docs = res["source_documents"]
|
98 |
+
metadatas = [doc.metadata for doc in docs]
|
99 |
+
all_sources = [m["source"] for m in metadatas]
|
100 |
+
|
101 |
+
for source in all_sources:
|
102 |
+
if source in visited_sources:
|
103 |
+
continue
|
104 |
+
visited_sources.add(source)
|
105 |
+
# Create the text element referenced in the message
|
106 |
+
source_elements.append(
|
107 |
+
cl.Text(content="https://www.youtube.com/watch?" + source, name="Link to Video")
|
108 |
+
)
|
109 |
+
|
110 |
+
if source_elements:
|
111 |
+
answer += f"\nSources: {', '.join([e.content.decode('utf-8') for e in source_elements])}"
|
112 |
+
else:
|
113 |
+
answer += "\nNo sources found"
|
114 |
+
|
115 |
+
await cl.Message(content=answer, elements=source_elements).send()
|