File size: 4,012 Bytes
3107845 7d5df89 3107845 7d5df89 3107845 da65768 07b9194 7d5df89 3107845 7d5df89 3107845 7d5df89 da65768 7d5df89 da65768 7d5df89 da65768 7d5df89 3107845 7d5df89 3107845 e8f0dc8 3107845 7d5df89 3107845 7d5df89 3107845 7d5df89 3107845 7d5df89 3107845 07b9194 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from pathlib import Path
import os
import openai
openai.api_key = os.getenv("OAI_KEY")
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
import nest_asyncio
nest_asyncio.apply()
from llama_index.core import(SimpleDirectoryReader,
VectorStoreIndex, StorageContext,
Settings,set_global_tokenizer)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from transformers import AutoTokenizer, BitsAndBytesConfig
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
import logging
import sys
import streamlit as st
import os
from llama_index.core import load_index_from_storage
Settings.llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.2)
Settings.embed_model = OpenAIEmbedding(
model="text-embedding-3-large", embed_batch_size=100
)
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
def getDocs(doc_path="./data/"):
documents = SimpleDirectoryReader(doc_path).load_data()
return documents
def getVectorIndex():
Settings.chunk_size = 512
index_set = {}
if os.path.isdir(f"./storage/open_ai_embedding_data_large"):
print("Index already exists")
storage_context = StorageContext.from_defaults(
persist_dir=f"./storage/open_ai_embedding_data_large"
)
cur_index = load_index_from_storage(
storage_context,
)
else:
print("Index does not exist, creating new index")
docs = getDocs()
storage_context = StorageContext.from_defaults()
cur_index = VectorStoreIndex.from_documents(docs, storage_context=storage_context)
storage_context.persist(persist_dir=f"./storage/open_ai_embedding_data_large")
return cur_index
def getQueryEngine(index):
query_engine = index.as_chat_engine()
return query_engine
st.set_page_config(page_title="Project BookWorm: Your own Librarian!", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None)
st.title("Project BookWorm: Your own Librarian!")
st.info("Use this app to get recommendations for books that your kids will love!", icon="📃")
if "messages" not in st.session_state.keys(): # Initialize the chat messages history
st.session_state.messages = [
{"role": "assistant", "content": "Ask me a question about children's books or movies!"}
]
@st.cache_resource(show_spinner=False)
def load_data():
index = getVectorIndex()
return index
import time
s_time = time.time()
index = load_data()
e_time = time.time()
print(f"It took {e_time - s_time} to load index")
if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine
st.session_state.chat_engine = index.as_chat_engine(chat_mode="condense_plus_context", verbose=True)
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
for message in st.session_state.messages: # Display the prior chat messages
with st.chat_message(message["role"]):
st.write(message["content"])
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = st.session_state.chat_engine.chat(prompt)
st.write(response.response)
message = {"role": "assistant", "content": response.response}
st.session_state.messages.append(message) # Add response to message history
# if __name__ == "__main__":
# index = getVectorIndex(getDocs())
# query_engine = getQueryEngine(index)
# while(True):
# your_request = input("Your comment: ")
# response = query_engine.chat(your_request)
# print(response)
|