Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +26 -19
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,9 +4,9 @@ emoji: πŸ“‰
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.42.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 4.2.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,34 +1,40 @@
1
  import gradio as gr
2
- import random
3
- import time
4
  import boto3
5
  from botocore import UNSIGNED
6
  from botocore.client import Config
7
- import zipfile
 
 
 
 
8
 
9
  from langchain.llms import HuggingFaceHub
10
- model_id = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct", model_kwargs={"temperature":0.1, "max_new_tokens":1024})
11
 
12
  from langchain.embeddings import HuggingFaceHubEmbeddings
13
  embeddings = HuggingFaceHubEmbeddings()
14
 
15
- from langchain.vectorstores import FAISS
16
 
17
  from langchain.chains import RetrievalQA
18
 
19
- s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
20
- s3.download_file('rad-rag-demos', 'vectorstores/faiss_db_ray.zip', './chroma_db/faiss_db_ray.zip')
21
- with zipfile.ZipFile('./chroma_db/faiss_db_ray.zip', 'r') as zip_ref:
22
- zip_ref.extractall('./chroma_db/')
23
 
24
- FAISS_INDEX_PATH='./chroma_db/faiss_db_ray'
25
- #embeddings = HuggingFaceHubEmbeddings("multi-qa-mpnet-base-dot-v1")
26
- embeddings = HuggingFaceHubEmbeddings()
27
- db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
28
- retriever = db.as_retriever(search_type = "mmr")
 
 
 
 
 
 
 
29
 
30
  global qa
31
- qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever)
32
 
33
 
34
  def add_text(history, text):
@@ -40,7 +46,8 @@ def bot(history):
40
  history[-1][1] = response['result']
41
  return history
42
 
43
- def infer(question):
 
44
  query = question
45
  result = qa({"query": query})
46
  return result
@@ -51,9 +58,9 @@ css="""
51
 
52
  title = """
53
  <div style="text-align: center;max-width: 700px;">
54
- <h1>Chat with the RAY Docs</h1>
55
- <p style="text-align: center;">The AI bot is here to help you with the RAY Documentation, <br />
56
- start asking questions about the open-source software </p>
57
  </div>
58
  """
59
 
 
1
  import gradio as gr
 
 
2
  import boto3
3
  from botocore import UNSIGNED
4
  from botocore.client import Config
5
+
6
+ from langchain.document_loaders import WebBaseLoader
7
+
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=350, chunk_overlap=10)
10
 
11
  from langchain.llms import HuggingFaceHub
12
+ model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
13
 
14
  from langchain.embeddings import HuggingFaceHubEmbeddings
15
  embeddings = HuggingFaceHubEmbeddings()
16
 
17
+ from langchain.vectorstores import Chroma
18
 
19
  from langchain.chains import RetrievalQA
20
 
21
+ from langchain.prompts import ChatPromptTemplate
 
 
 
22
 
23
+ #web_links = ["https://www.databricks.com/","https://help.databricks.com","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html"]
24
+ #loader = WebBaseLoader(web_links)
25
+ #documents = loader.load()
26
+
27
+ s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
28
+ s3.download_file('rad-rag-demos', 'vectorstores/chroma.sqlite3', './chroma_db/chroma.sqlite3')
29
+
30
+ db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
31
+ db.get()
32
+ #texts = text_splitter.split_documents(documents)
33
+ #db = Chroma.from_documents(texts, embedding_function=embeddings)
34
+ retriever = db.as_retriever()
35
 
36
  global qa
37
+ qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, return_source_documents=True)
38
 
39
 
40
  def add_text(history, text):
 
46
  history[-1][1] = response['result']
47
  return history
48
 
49
+ def infer(question):
50
+
51
  query = question
52
  result = qa({"query": query})
53
  return result
 
58
 
59
  title = """
60
  <div style="text-align: center;max-width: 700px;">
61
+ <h1>Chat with PDF</h1>
62
+ <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
63
+ when everything is ready, you can start asking questions about the pdf ;)</p>
64
  </div>
65
  """
66
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  huggingface_hub
2
- faiss-cpu
3
  langchain
4
  boto3
5
  unstructured
 
1
  huggingface_hub
2
+ chromadb
3
  langchain
4
  boto3
5
  unstructured