Spaces:

MedTiouti
/

SandHillRoadPodcast

Runtime error

App Files Files Community

Med Tiouti commited on Jan 13

Commit

5491a72

•

1 Parent(s): 1a66660

Test8326832

Browse files

Files changed (6) hide show

.gitattributes +1 -0
README.md +3 -0
app.py +162 -0
faiss_index_shl/index.faiss +3 -0
faiss_index_shl/index.pkl +3 -0
requirements.txt +9 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+faiss_index_shl/index.faiss filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -7,6 +7,9 @@ sdk: gradio
 sdk_version: 4.14.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 4.14.0
 app_file: app.py
 pinned: false
+models:
+- sentence-transformers/all-MiniLM-L6-v2
+- daryl149/llama-2-7b-chat-hf
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import gradio as gr
+# retrievers
+from langchain.chains import RetrievalQA
+import textwrap
+import time
+import torch
+import transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# models
+from langchain.llms import HuggingFacePipeline
+from InstructorEmbedding import INSTRUCTOR
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+# prompts
+from langchain import PromptTemplate, LLMChain
+# vector stores
+from langchain.vectorstores import FAISS
+def get_model(model_name):
+    model_repo = 'daryl149/llama-2-7b-chat-hf'
+    tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_repo,
+        load_in_4bit=True,
+        device_map='auto',
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+        trust_remote_code=True
+    )
+    max_len = 2048
+    return tokenizer,model,max_len
+tokenizer, model, max_len = get_model("llama2-13b")
+temperature = 0,
+top_p = 0.95,
+repetition_penalty = 1.15
+pipe = pipeline(
+    task = "text-generation",
+    model = model,
+    tokenizer = tokenizer,
+    pad_token_id = tokenizer.eos_token_id,
+    max_length = max_len,
+    temperature = temperature,
+    top_p = top_p,
+    repetition_penalty = repetition_penalty
+)
+llm = HuggingFacePipeline(pipeline = pipe)
+# similar passages
+k = 3
+embeddings_shl_path ="/content/faiss_index_shl"
+embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
+### download embeddings model
+embeddings = HuggingFaceInstructEmbeddings(
+    model_name = embeddings_model_repo,
+    model_kwargs = {"device": "cuda"}
+)
+### load vector DB embeddings
+vectordb = FAISS.load_local(
+    embeddings_shl_path,
+    embeddings
+)
+prompt_template = """
+Don't try to make up an answer, if you don't know just say that you don't know.
+Answer in the same language the question was asked.
+Don't mention in the answer the speaker just give the answer directly.
+Use only the following pieces of context to answer the question at the end.
+{context}
+Question: {question}
+Answer:"""
+PROMPT = PromptTemplate(
+    template = prompt_template,
+    input_variables = ["context", "question"]
+)
+retriever = vectordb.as_retriever(search_kwargs = {"k": 3, "search_type" : "similarity"})
+qa_chain = RetrievalQA.from_chain_type(
+    llm = llm,
+    chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
+    retriever = retriever,
+    chain_type_kwargs = {"prompt": PROMPT},
+    return_source_documents = True,
+    verbose = False
+)
+def wrap_text_preserve_newlines(text, width=700):
+    # Split the input text into lines based on newline characters
+    lines = text.split('\n')
+    # Wrap each line individually
+    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
+    # Join the wrapped lines back together using newline characters
+    wrapped_text = '\n'.join(wrapped_lines)
+    return wrapped_text
+def process_llm_response(llm_response):
+    ans = wrap_text_preserve_newlines(llm_response['result'])
+    sources_used = ' \n'.join(
+        [
+            "<b> - " + source.metadata['source'].split('/')[-1][:-4] + "</b>"
+            for source in llm_response['source_documents']
+        ]
+    )
+    ans += "\n Sand Hill Road podcast episodes based on your question : \n" + sources_used
+    return ans,sources_used
+def llm_ans(query):
+    start = time.time()
+    llm_response = qa_chain(query)
+    ans,sources_used = process_llm_response(llm_response)
+    end = time.time()
+    time_elapsed = int(round(end - start, 0))
+    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
+    return ans, sources_used ,time_elapsed_str
+def predict(message, history):
+    # output = message # debug mode
+    output = str(llm_ans(message)[0]).replace("\n", "<br/>")
+    return output
+demo = gr.ChatInterface(
+    predict,
+    title = f' Sand Hill Road Podcast Chatbot'
+)
+demo.queue()
+demo.launch(debug=True)

faiss_index_shl/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:491d00dbb83399b1954976033a584b7f6d92c631d182662ae4ce6a7fbea4acb5
+size 4389933

faiss_index_shl/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b02757d1a2c734a71e2b08b4569007dfd66dab3fbec3d2590a0b63c55a7dabf7
+size 2337485

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+langchain
+faiss-gpu
+transformers
+InstructorEmbedding
+sentence_transformers
+accelerate
+bitsandbytes
+xformers
+einops