import gradio as gr from langchain_community.document_loaders import YoutubeLoader from langchain_cohere import ChatCohere import bs4 from langchain import hub from langchain_chroma import Chroma from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_cohere import CohereEmbeddings import os import os COHERE_API_KEY = os.environ.get("COHERE_API_KEY") llm = ChatCohere(model="command-r",cohere_api_key=COHERE_API_KEY) prompt = hub.pull("rlm/rag-prompt") text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) # Function to load YouTube details def get_youtube_details(video_url): print(video_url) loader = YoutubeLoader.from_youtube_url(str(video_url), add_video_info=False) docs = loader.load() print("Video transcripts loaded in DB") return docs, loader # Function to handle user messages and update the history def user_message(message, history): return "", history + [[message, None]] # Function to clear the vector store (optional, not used in this example) def clear_vectorstore(vectorstore): vectorstore.delete_all() return "Vector store cleared." # Function to clear the text box and reset the state def clear_textbox(): return "", None, None # Function to handle bot responses def bot_message(history, docs): if docs is None: return history user_question = history[-1][0] splits = text_splitter.split_documents(docs) vectorstore = Chroma.from_documents(documents=splits, embedding=CohereEmbeddings(model="embed-english-light-v3.0", cohere_api_key=COHERE_API_KEY)) retriever = vectorstore.as_retriever() rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) response = rag_chain.invoke(user_question) history[-1][1] = response return history title=( """