juan-demo's picture
Upload 3 files
9e773ef verified
import openai
import pinecone
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
import boto3
import os
from time import sleep
from dotenv import load_dotenv
import gradio as gr
# Load environment variables
load_dotenv()
# Load OpenAI and Pinecone API keys from environment variables
openai.api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
# Download the combined extracted text file from S3
s3_client = boto3.client('s3',
aws_access_key_id=aws_access_key,
aws_secret_access_key=aws_secret_key,
region_name='us-east-1')
bucket_name = 'amtrak-superliner-ai-poc' # Replace with your S3 bucket name
txt_file_name = 'combined_extracted_text.txt' # Name of the text file stored in S3
local_txt_path = f'/tmp/{txt_file_name}' # Temporary location to store the file locally
# Download the text file from S3
s3_client.download_file(bucket_name, txt_file_name, local_txt_path)
# Load the extracted text from the text file
with open(local_txt_path, 'r') as f:
doc = f.read()
# Split the document into smaller chunks (increase chunk size as needed)
text_splitter = CharacterTextSplitter(separator='\n', chunk_size=2000, chunk_overlap=500)
docs = [Document(page_content=doc)]
split_docs = text_splitter.split_documents(docs)
# Initialize the HuggingFace SciBERT model for embedding
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")
# Create embeddings for the document chunks
doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in split_docs]
# Initialize Pinecone client using the old structure you prefer
pc = pinecone.Pinecone(api_key=pinecone_api_key)
# Create Pinecone index if it doesn't exist
index_name = "amtrak-acela-ai-demo"
embedding_dim = 768 # For SciBERT model
if index_name not in pc.list_indexes().names():
# Create Pinecone index if it doesn't exist
pc.create_index(
name=index_name,
dimension=embedding_dim,
metric="cosine",
spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
)
# Connect to the Pinecone index
index = pc.Index(index_name)
# Upload document embeddings to Pinecone with metadata
for i, doc in enumerate(split_docs):
index.upsert(vectors=[(str(i), doc_embeddings[i], {'content': doc.page_content})])
# Set up conversation memory
memory = ConversationBufferMemory()
# Define a prompt template for retrieval-augmented generation (RAG)
RAG_PROMPT_TEMPLATE = '''
Here is some important context that can help inform the Human's question:
{context}
Human: {human_input}
Please provide a specific and accurate answer based on the provided context.
Assistant:
'''
PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
def get_model_response(human_input, chat_history=None): # Add the second argument to handle chat history
try:
# Step 1: Embed the user input
query_embedding = embedding_model.embed_query(human_input)
# Step 2: Query Pinecone using the embedding vector
search_results = index.query(
vector=query_embedding,
top_k=5,
include_metadata=True # Ensures metadata is included in the results
)
# Step 3: Extract relevant context (actual document content) from the search results
context_list = []
for ind, result in enumerate(search_results['matches']):
document_content = result.get('metadata', {}).get('content', 'No content found')
context_list.append(f"Document {ind+1}: {document_content}")
# Combine context into a string
context_string = '\n\n'.join(context_list)
# Step 4: Call OpenAI ChatCompletion API for responses
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
max_tokens=400,
temperature=0.7
)
# Extract and return the model’s output
output_text = response['choices'][0]['message']['content'].strip()
return output_text
except Exception as e:
return f"Error invoking model: {str(e)}"
# Gradio ChatInterface
gr_interface = gr.ChatInterface(
fn=get_model_response,
title="Amtrak Acela RMM Maintenance Assistant",
description="Ask questions related to the RMMM documents."
)
# Launch the Gradio app on Hugging Face Spaces
gr_interface.launch()