Spaces:
Sleeping
Sleeping
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.document_loaders import TextLoader | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain.schema.output_parser import StrOutputParser | |
from langchain_pinecone import PineconeVectorStore | |
from langchain.prompts import PromptTemplate | |
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings | |
from dotenv import load_dotenv, find_dotenv | |
import os | |
from pinecone import Pinecone, PodSpec | |
load_dotenv(find_dotenv()) | |
class Chatbot(): | |
loader = TextLoader('dataset.txt', autodetect_encoding=True) | |
documents = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=4) | |
docs = text_splitter.split_documents(documents) | |
embeddings = GoogleGenerativeAIEmbeddings( | |
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY") | |
) | |
pinecone = Pinecone( | |
api_key=os.environ.get("PINECONE_API_KEY") | |
# host='gcp-starter' | |
) | |
index_name = "thehexatechchatbot" | |
if index_name not in pinecone.list_indexes().names(): | |
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) | |
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) | |
else: | |
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) | |
llm = GoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key=os.getenv("GEMINI_API_KEY")) | |
template = """ | |
INSTRUCTION: Act as Personal Assistant chatbot of The Hexatech, an IT Startup that provides core hexa services: design, deployment, web development, ai & ml, software. this is conversation \ | |
to a user who wants to get his query solved about The Hexatech. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ | |
If you don't know any ANSWER, say you don't know \ | |
Always follow general guardrails before generating any response. \ | |
Always try to keep the conversation in context to The Hexatech. Keep your replies short \ | |
compassionate and informative.\ | |
Give the answer from the CONTEXT\ | |
You should help user to get his query solved and also try to increase engagement and promoting The Hexatech & its services.\ | |
CONTEXT: {context} | |
QUESTION: {question} | |
ANSWER: | |
""" | |
prompt = PromptTemplate( | |
template=template, | |
input_variables=["context", "question"] | |
) | |
rag_chain = ( | |
{"context": docsearch.as_retriever(), "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |