import openai import pinecone from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.docstore.document import Document from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory import boto3 import os from time import sleep from dotenv import load_dotenv import gradio as gr # Load environment variables load_dotenv() # Load OpenAI and Pinecone API keys from environment variables openai.api_key = os.getenv("OPENAI_API_KEY") pinecone_api_key = os.getenv("PINECONE_API_KEY") aws_access_key = os.getenv("AWS_ACCESS_KEY_ID") aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY") # Download the combined extracted text file from S3 s3_client = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name='us-east-1') bucket_name = 'amtrak-superliner-ai-poc' # Replace with your S3 bucket name txt_file_name = 'combined_extracted_text.txt' # Name of the text file stored in S3 local_txt_path = f'/tmp/{txt_file_name}' # Temporary location to store the file locally # Download the text file from S3 s3_client.download_file(bucket_name, txt_file_name, local_txt_path) # Load the extracted text from the text file with open(local_txt_path, 'r') as f: doc = f.read() # Split the document into smaller chunks (increase chunk size as needed) text_splitter = CharacterTextSplitter(separator='\n', chunk_size=2000, chunk_overlap=500) docs = [Document(page_content=doc)] split_docs = text_splitter.split_documents(docs) # Initialize the HuggingFace SciBERT model for embedding embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4") # Create embeddings for the document chunks doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in split_docs] # Initialize Pinecone client using the old structure you prefer pc = pinecone.Pinecone(api_key=pinecone_api_key) # Create Pinecone index if it doesn't exist index_name = "amtrak-acela-ai-demo" embedding_dim = 768 # For SciBERT model if index_name not in pc.list_indexes().names(): # Create Pinecone index if it doesn't exist pc.create_index( name=index_name, dimension=embedding_dim, metric="cosine", spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1") ) # Connect to the Pinecone index index = pc.Index(index_name) # Upload document embeddings to Pinecone with metadata for i, doc in enumerate(split_docs): index.upsert(vectors=[(str(i), doc_embeddings[i], {'content': doc.page_content})]) # Set up conversation memory memory = ConversationBufferMemory() # Define a prompt template for retrieval-augmented generation (RAG) RAG_PROMPT_TEMPLATE = ''' Here is some important context that can help inform the Human's question: {context} Human: {human_input} Please provide a specific and accurate answer based on the provided context. Assistant: ''' PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE) def get_model_response(human_input, chat_history=None): # Add the second argument to handle chat history try: # Step 1: Embed the user input query_embedding = embedding_model.embed_query(human_input) # Step 2: Query Pinecone using the embedding vector search_results = index.query( vector=query_embedding, top_k=5, include_metadata=True # Ensures metadata is included in the results ) # Step 3: Extract relevant context (actual document content) from the search results context_list = [] for ind, result in enumerate(search_results['matches']): document_content = result.get('metadata', {}).get('content', 'No content found') context_list.append(f"Document {ind+1}: {document_content}") # Combine context into a string context_string = '\n\n'.join(context_list) # Step 4: Call OpenAI ChatCompletion API for responses messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"} ] response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages, max_tokens=400, temperature=0.7 ) # Extract and return the model’s output output_text = response['choices'][0]['message']['content'].strip() return output_text except Exception as e: return f"Error invoking model: {str(e)}" # Gradio ChatInterface gr_interface = gr.ChatInterface( fn=get_model_response, title="Amtrak Acela RMM Maintenance Assistant", description="Ask questions related to the RMMM documents." ) # Launch the Gradio app on Hugging Face Spaces gr_interface.launch()