Spaces:

mlara
/

earnings-final

Sleeping

File size: 4,679 Bytes

# -*- coding: utf-8 -*-
# Imports
import asyncio
import os
import openai

from typing import List, Optional
# from pydantic import BaseModel, Field

# from langchain.prompts import ChatPromptTemplate
# from langchain.pydantic_v1 import BaseModel
# from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from llama_index.tools import FunctionTool
from llama_index.vector_stores.types import (
    VectorStoreInfo,
    MetadataInfo,
    ExactMatchFilter,
    MetadataFilters,
)
from llama_index.agent import OpenAIAgent
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine

from typing import List, Tuple, Any
from pydantic import BaseModel, Field
from llama_index import load_index_from_storage
from llama_index import set_global_handler
import llama_index
from llama_index.embeddings import OpenAIEmbedding
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index import GPTVectorStoreIndex

set_global_handler("wandb", run_args={"project": "final-project-v1"})
wandb_callback = llama_index.global_handler

from dotenv import load_dotenv
load_dotenv()

openai.api_key = os.environ['OPENAI_API_KEY']

top_k = 3

vector_store_info = VectorStoreInfo(
    content_info="transcripts of earnings calls",
    metadata_info=[MetadataInfo(
        name="title",
        type="str",
        description="Title of the earnings call",
    ),
    MetadataInfo(
        name="period",
        type="str",
        description="Period of the earnings call"
    ),
    MetadataInfo(
        name="ticker",
        type="str",
        description="Ticker of the company"
    ),
    MetadataInfo(
        name="year",
        type="str",
        description="Year of the earnings call"
    ),
    MetadataInfo(
        name="quarter",
        type="str",
        description="Quarter of the earnings call"
    ),
    MetadataInfo(
        name="path",
        type="str",
        description="Path to the earnings call"
    ),
])

class AutoRetrieveModel(BaseModel):
    query: str = Field(..., description="natural language query string")
    filter_key_list: List[str] = Field(
        ..., description="List of metadata filter field names"
    )
    filter_value_list: List[str] = Field(
        ...,
        description=(
            "List of metadata filter field values (corresponding to names specified in filter_key_list)"
        )
    )

embed_model = OpenAIEmbedding()
chunk_size = 500

llm = OpenAI(
    temperature=0,
    model="gpt-4" ### YOUR CODE HERE
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    chunk_size=chunk_size,
    embed_model=embed_model,
)

index = GPTVectorStoreIndex.from_documents([], service_context=service_context)


# Main function to extract information
async def extract_information():
    # Make sure to use a recent model that supports tools

    storage_context = wandb_callback.load_storage_context(
        artifact_url="llmop/final-project-v1/earnings-index:v1"
    )

    index = load_index_from_storage(storage_context, service_context=service_context)

    def auto_retrieve_fn(
        query: str, filter_key_list: List[str], filter_value_list: List[str]
    ):
        """Auto retrieval function.

        Performs auto-retrieval from a vector database, and then applies a set of filters.

        """
        query = query or "Query"

        exact_match_filters = [
            ExactMatchFilter(key=k, value=v)
            for k, v in zip(filter_key_list, filter_value_list)
        ]
        retriever = VectorIndexRetriever(
            index, filters=MetadataFilters(filters=exact_match_filters), top_k=top_k
        )
        query_engine = RetrieverQueryEngine.from_args(retriever, service_context=service_context)

        response = query_engine.query(query)
        return str(response)
    
    description = f"""
        Who is the CEO of MSFT
        The vector database schema is given below:
        {vector_store_info.json()}
    """
    auto_retrieve_tool = FunctionTool.from_defaults(
        fn=auto_retrieve_fn,
        name="earnings-transcripts",
        description=description,
        fn_schema=AutoRetrieveModel
    )

    agent = OpenAIAgent.from_tools(
        tools=[auto_retrieve_tool],
    )

    return agent


# if __name__ == "__main__":
#     text = "Who is the CEO of MSFT."
#     chain = extract_information()
#     print(str(chain.chat(text)))

#     async def extract_information_async(message: str):
#         return str(chain.chat(text))

#     async def main():
#         res = await extract_information_async(text)
#         print(res)

    # asyncio.run(main())