Spaces:

FridayMaster
/

CHATBOT1

Sleeping

App Files Files Community

FridayMaster commited on Aug 4, 2024

Commit

3d00632

verified ·

1 Parent(s): b4ca8bc

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -21

app.py CHANGED Viewed

@@ -1,32 +1,78 @@
-    import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load the model and tokenizer
-model_name = 'FridayMaster/fine_tune_embedding'
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)  # Use the appropriate class
-# Define a function to generate responses
-def generate_response(prompt):
-    # Tokenize the input prompt
-    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
-    with torch.no_grad():
-        # Generate a response using the model
-        outputs = model.generate(inputs['input_ids'], max_length=150, num_return_sequences=1)
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
-# Create a Gradio interface
 iface = gr.Interface(
     fn=generate_response,
-    inputs=gr.Textbox(label="Enter your message", placeholder="Type something here..."),
     outputs=gr.Textbox(label="Response"),
-    title="Chatbot Interface",
-    description="Interact with the fine-tuned chatbot model."
 )
-# Launch the Gradio app
 if __name__ == "__main__":
     iface.launch()

+import pandas as pd
+import fitz  # PyMuPDF for PDF extraction
+import spacy
+from langchain.vectorstores import FAISS
 import torch
+from transformers import AutoTokenizer, AutoModel
+import gradio as gr
+# Load and preprocess PDF text
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with fitz.open(pdf_path) as pdf_document:
+        for page_num in range(len(pdf_document)):
+            page = pdf_document.load_page(page_num)
+            text += page.get_text()
+    return text
+# Extract text from the PDF
+pdf_text = extract_text_from_pdf('Getting_Started_with_Ubuntu_16.04.pdf')  # Replace with your PDF path
+# Convert the text to a DataFrame
+df = pd.DataFrame({'text': [pdf_text]})
+# Define your custom embedding model
+class CustomEmbeddingModel:
+    def __init__(self, model_name):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name)
+    def embed_text(self, text):
+        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
+        with torch.no_grad():
+            embeddings = self.model(**inputs).last_hidden_state.mean(dim=1)
+        return embeddings[0].numpy()
+embedding_model = CustomEmbeddingModel('distilbert-base-uncased')  # Replace with your model name
+# Load Spacy model for preprocessing
+nlp = spacy.load("en_core_web_sm")
+def preprocess_text(text):
+    doc = nlp(text)
+    tokens = [token.lemma_.lower() for token in doc if token.text.lower() not in stopwords.words('english') and token.is_alpha]
+    return ' '.join(tokens)
+# Apply preprocessing and embedding
+df['text'] = df['text'].apply(preprocess_text)
+df['text_embeddings'] = df['text'].apply(lambda x: embedding_model.embed_text(x))
+# Create FAISS vector store
+documents = df['text'].tolist()
+embeddings = df['text_embeddings'].tolist()
+vector_store = FAISS.from_documents(documents, embeddings)
+# Function to generate a response
+def generate_response(query):
+    preprocessed_query = preprocess_text(query)
+    query_embedding = embedding_model.embed_text(preprocessed_query)
+    # Find the closest document in the vector store
+    distances, indices = vector_store.search(query_embedding, k=1)  # k=1 for the closest document
+    if indices:
+        response = documents[indices[0]]
+    else:
+        response = "No relevant information found."
     return response
+# Gradio interface
 iface = gr.Interface(
     fn=generate_response,
+    inputs=gr.Textbox(label="Enter your query", placeholder="Ask about Ubuntu..."),
     outputs=gr.Textbox(label="Response"),
+    title="Ubuntu Manual Chatbot",
+    description="Ask questions about the Ubuntu manual."
 )
 if __name__ == "__main__":
     iface.launch()