Spaces:

FridayMaster
/

CHATBOT1

Sleeping

App Files Files Community

FridayMaster commited on Aug 4, 2024

Commit

1842c48

verified ·

1 Parent(s): 90198de

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import pandas as pd
 import fitz  # PyMuPDF for PDF extraction
 import spacy
-from langchain.vectorstores import FAISS
 import torch
 from transformers import AutoTokenizer, AutoModel
 import gradio as gr
-from langchain_community.vectorstores import FAISS
 # Load and preprocess PDF text
 def extract_text_from_pdf(pdf_path):
@@ -18,13 +18,12 @@ def extract_text_from_pdf(pdf_path):
     return text
 # Extract text from the PDF
-pdf_text = extract_text_from_pdf('Getting Started with Ubuntu 16.04.pdf')
 # Convert the text to a DataFrame
 df = pd.DataFrame({'text': [pdf_text]})
-# Define your custom embedding model
 class CustomEmbeddingModel:
     def __init__(self, model_name):
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -39,7 +38,7 @@ class CustomEmbeddingModel:
 embedding_model = CustomEmbeddingModel('distilbert-base-uncased')  # Replace with your model name
 # Load Spacy model for preprocessing
-nlp = spacy.load("en_core_web_sm")
 def preprocess_text(text):
     doc = nlp(text)
@@ -55,16 +54,15 @@ documents = df['text'].tolist()
 embeddings = df['text_embeddings'].tolist()
 vector_store = FAISS.from_documents(documents, embeddings)
 # Function to generate a response
-def generate_response(query):
-    preprocessed_query = preprocess_text(query)
-    query_embedding = embedding_model.embed_text(preprocessed_query)
-    # Find the closest document in the vector store
-    distances, indices = vector_store.search(query_embedding, k=1)  # k=1 for the closest document
-    if indices:
-        response = documents[indices[0]]
-    else:
-        response = "No relevant information found."
     return response
 # Gradio interface
@@ -79,3 +77,4 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 import pandas as pd
 import fitz  # PyMuPDF for PDF extraction
 import spacy
+from langchain.chains import ConversationalRetrievalChain
+from langchain.llms import OpenAI
+from langchain_community.vectorstores import FAISS  # Updated import
 import torch
 from transformers import AutoTokenizer, AutoModel
 import gradio as gr
 # Load and preprocess PDF text
 def extract_text_from_pdf(pdf_path):
     return text
 # Extract text from the PDF
+pdf_text = extract_text_from_pdf('Getting Started with Ubuntu 16.04.pdf')  # Ensure this path is correct
 # Convert the text to a DataFrame
 df = pd.DataFrame({'text': [pdf_text]})
+# Load the custom embedding model
 class CustomEmbeddingModel:
     def __init__(self, model_name):
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 embedding_model = CustomEmbeddingModel('distilbert-base-uncased')  # Replace with your model name
 # Load Spacy model for preprocessing
+nlp = spacy.load("en_core_web_sm")  # Ensure the model is installed
 def preprocess_text(text):
     doc = nlp(text)
 embeddings = df['text_embeddings'].tolist()
 vector_store = FAISS.from_documents(documents, embeddings)
+# Create LangChain model and chain
+llm_model = OpenAI('gpt-3.5-turbo')  # You can replace this with a different LLM if desired
+retriever = vector_store.as_retriever()
+chain = ConversationalRetrievalChain.from_llm(llm_model, retriever=retriever)
 # Function to generate a response
+def generate_response(prompt):
+    result = chain({"query": prompt})
+    response = result["result"]
     return response
 # Gradio interface
 if __name__ == "__main__":
     iface.launch()