Spaces:

juan-demo
/

amtrak-acela-rmm-demo

Sleeping

App Files Files Community

amtrak-acela-rmm-demo / app.py

juan-demo

Upload 3 files

9e773ef verified 6 months ago

raw

history blame contribute delete

5.15 kB

	import openai
	import pinecone
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.docstore.document import Document
	from langchain.prompts import PromptTemplate
	from langchain.memory import ConversationBufferMemory
	import boto3
	import os
	from time import sleep
	from dotenv import load_dotenv
	import gradio as gr

	# Load environment variables
	load_dotenv()

	# Load OpenAI and Pinecone API keys from environment variables
	openai.api_key = os.getenv("OPENAI_API_KEY")
	pinecone_api_key = os.getenv("PINECONE_API_KEY")
	aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
	aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")

	# Download the combined extracted text file from S3
	s3_client = boto3.client('s3',
	aws_access_key_id=aws_access_key,
	aws_secret_access_key=aws_secret_key,
	region_name='us-east-1')

	bucket_name = 'amtrak-superliner-ai-poc' # Replace with your S3 bucket name
	txt_file_name = 'combined_extracted_text.txt' # Name of the text file stored in S3
	local_txt_path = f'/tmp/{txt_file_name}' # Temporary location to store the file locally

	# Download the text file from S3
	s3_client.download_file(bucket_name, txt_file_name, local_txt_path)

	# Load the extracted text from the text file
	with open(local_txt_path, 'r') as f:
	doc = f.read()

	# Split the document into smaller chunks (increase chunk size as needed)
	text_splitter = CharacterTextSplitter(separator='\n', chunk_size=2000, chunk_overlap=500)
	docs = [Document(page_content=doc)]
	split_docs = text_splitter.split_documents(docs)

	# Initialize the HuggingFace SciBERT model for embedding
	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")

	# Create embeddings for the document chunks
	doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in split_docs]

	# Initialize Pinecone client using the old structure you prefer
	pc = pinecone.Pinecone(api_key=pinecone_api_key)

	# Create Pinecone index if it doesn't exist
	index_name = "amtrak-acela-ai-demo"
	embedding_dim = 768 # For SciBERT model
	if index_name not in pc.list_indexes().names():
	# Create Pinecone index if it doesn't exist
	pc.create_index(
	name=index_name,
	dimension=embedding_dim,
	metric="cosine",
	spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
	)

	# Connect to the Pinecone index
	index = pc.Index(index_name)

	# Upload document embeddings to Pinecone with metadata
	for i, doc in enumerate(split_docs):
	index.upsert(vectors=[(str(i), doc_embeddings[i], {'content': doc.page_content})])

	# Set up conversation memory
	memory = ConversationBufferMemory()

	# Define a prompt template for retrieval-augmented generation (RAG)
	RAG_PROMPT_TEMPLATE = '''
	Here is some important context that can help inform the Human's question:

	{context}

	Human: {human_input}

	Please provide a specific and accurate answer based on the provided context.
	Assistant:
	'''

	PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

	def get_model_response(human_input, chat_history=None): # Add the second argument to handle chat history
	try:
	# Step 1: Embed the user input
	query_embedding = embedding_model.embed_query(human_input)

	# Step 2: Query Pinecone using the embedding vector
	search_results = index.query(
	vector=query_embedding,
	top_k=5,
	include_metadata=True # Ensures metadata is included in the results
	)

	# Step 3: Extract relevant context (actual document content) from the search results
	context_list = []
	for ind, result in enumerate(search_results['matches']):
	document_content = result.get('metadata', {}).get('content', 'No content found')
	context_list.append(f"Document {ind+1}: {document_content}")

	# Combine context into a string
	context_string = '\n\n'.join(context_list)

	# Step 4: Call OpenAI ChatCompletion API for responses
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}
	]

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages,
	max_tokens=400,
	temperature=0.7
	)

	# Extract and return the model’s output
	output_text = response['choices'][0]['message']['content'].strip()
	return output_text

	except Exception as e:
	return f"Error invoking model: {str(e)}"

	# Gradio ChatInterface
	gr_interface = gr.ChatInterface(
	fn=get_model_response,
	title="Amtrak Acela RMM Maintenance Assistant",
	description="Ask questions related to the RMMM documents."
	)

	# Launch the Gradio app on Hugging Face Spaces
	gr_interface.launch()