from langchain.text_splitter import CharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain.schema.runnable import RunnablePassthrough from langchain.schema.output_parser import StrOutputParser from langchain_pinecone import PineconeVectorStore from langchain.prompts import PromptTemplate from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings from dotenv import load_dotenv, find_dotenv import os from pinecone import Pinecone, PodSpec load_dotenv(find_dotenv()) class Chatbot(): loader = TextLoader('dataset.txt', autodetect_encoding=True) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4) docs = text_splitter.split_documents(documents) embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY") ) pinecone = Pinecone( api_key=os.environ.get("PINECONE_API_KEY") # host='gcp-starter' ) index_name = "thehexatechchatbot" if index_name not in pinecone.list_indexes().names(): pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) else: docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) llm = GoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key=os.getenv("GEMINI_API_KEY")) template = """ INSTRUCTION: Act as Personal Assistant chatbot of The Hexatech, an IT Startup that provides core hexa services: design, deployment, web development, ai & ml, software. this is conversation \ to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ If you don't know any ANSWER, say you don't know \ Always follow general guardrails before generating any response. \ Always try to keep the conversation in context to The Hexatech. Keep your replies short \ compassionate and informative.\ Give the answer from the CONTEXT\ You should help user to get his query solved and also try to increase engagement and promoting The Hexatech & its services.\ CONTEXT: {context} QUESTION: {question} ANSWER: """ prompt = PromptTemplate( template=template, input_variables=["context", "question"] ) rag_chain = ( {"context": docsearch.as_retriever(), "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() )