amtam0
/

mh-explo

Model card Files Files and versions Community

amtam0 commited on Nov 12, 2023

Commit

eaffd42

•

1 Parent(s): ccd397c

add new files

Browse files

Files changed (7) hide show

Dockerfile +14 -0
app2.py +231 -0
build_db.py +96 -0
downld_models_local.py +11 -0
faiss_MH_c2000_o100/index.faiss +3 -0
faiss_MH_c2000_o100/index.pkl +3 -0
stt.py +46 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
+ENV DEBIAN_FRONTEND=noninteractive
+# Install necessary dependencies
+RUN apt-get update && apt install -y python3-pip ffmpeg
+# Set the working directory
+WORKDIR /app
+# Copy the app code and requirements filed
+COPY stt.py /app
+# Install the app dependencies
+#RUN pip3 install --no-cache-dir -r requirements.txt
+run pip3 install faster-whisper==0.6.0 flask==2.3.2

app2.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from llama_cpp import Llama
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS, Chroma
+from faster_whisper import WhisperModel
+import os
+import gradio as gr
+import torch
+import base64
+import json
+import chromadb
+import requests
+import gc, torch
+GPU = False if torch.cuda.device_count()==0 else True
+n_threads = os.cpu_count()//2
+global llm
+def load_llm(model_name):
+    try:
+        del llm
+    except:
+        pass
+    torch.cuda.empty_cache()
+    gc.collect()
+    llm = Llama(model_path=model_name,
+            n_threads=11, n_gpu_layers=80, n_ctx=3000)
+    return llm
+def load_faiss_db():
+    new_db = FAISS.load_local("faiss_MH_c2000_o100", hf_embs)
+    return new_db
+def load_chroma_db():
+    ABS_PATH = os.getcwd()#os.path.dirname(os.path.abspath(__file__))
+    DB_DIR = os.path.join(ABS_PATH, "chroma_MH_c1000_o0")
+    print("DB_DIR", DB_DIR)
+    client_settings = chromadb.config.Settings(
+        chroma_db_impl="duckdb+parquet",
+        persist_directory=DB_DIR,
+        anonymized_telemetry=False
+    )
+    vectorstore = Chroma(
+        collection_name="langchain_store",
+        embedding_function=hf_embs,
+        client_settings=client_settings,
+        persist_directory=DB_DIR,
+    )
+    return vectorstore
+def init_prompt_tempalate(context, question):
+    prompt_template = f"""<s>[INST]
+    As a health insurance assistant, use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    {context}
+    Question: {question}
+    Concise answer in French:
+    [/INST]"""
+    prompt_template = f"""As a health insurance assistant, use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    {context}
+    Question: {question}
+    Concise answer in French:"""
+    prompt_template = f"""Answer the question based only on the following context:
+    {context}
+    Question: {question}
+    Answer in the following language: French
+    """
+    prompt_template = f"""<|system|>
+    Answer the question based only on the following context:
+    {context}</s>
+    <|user|>
+    {question}</s>
+    <|assistant|>
+    """
+    return prompt_template
+def wav_to_base64(file_path):
+    base64_data = base64.b64encode(open(file_path, "rb").read()).decode("utf-8")
+    return base64_data
+def search_llm(question, max_tokens=10, temp=0, k_chunks=1, top_k=40,
+                top_p=0.95):
+    results = {}
+    context = ""
+    new_db = new_db_faiss
+    # if db_type=="faiss":
+    #     new_db = new_db_faiss
+    # else:
+    #     new_db = new_db_chroma
+    docs = new_db.similarity_search_with_score(question,
+                                               k=int(k_chunks))
+    contexts = [el[0].page_content for el in docs]
+    scores = [el[1] for el in docs]
+    context = "\n".join(contexts)
+    score = sum(scores) / len(scores)
+    score = round(score, 3)
+    url = docs[0][0].metadata
+    prompt_template = init_prompt_tempalate(context, question)
+    output = llm(prompt_template,
+                 max_tokens=int(max_tokens),
+                 stop=["Question:", "\n"],
+                 echo=True,
+                 temperature=temp,
+                 top_k=int(top_k),
+                 top_p=top_p)
+    # first_reponse = output["choices"][0]["text"].split("answer in French:")[-1].strip()
+    first_reponse = output["choices"][0]["text"].split("<|assistant|>")[-1].strip()
+    results["Response"] = first_reponse
+    # results["prompt_template"] = prompt_template
+    results["context"] = context
+    results["source"] = url
+    results["context_score"] = score
+    return results["Response"], results["source"], results["context"], results["context_score"]
+def stt(path):
+    injson = {}
+    injson["data"] = wav_to_base64(path)
+    results = requests.post(url="http://0.0.0.0:5566/api",
+                            json=injson,
+                            verify=False)
+    transcription = results.json()["transcription"]
+    query = transcription
+    query = transcription if "?" in transcription else transcription + "?"
+    return query
+def STT_LLM(path, max_tokens, temp, k_chunks, top_k, top_p, db_type):
+    """
+    """
+    query = stt(path)
+    Response, url, context, contextScore = search_llm(query, max_tokens, temp, k_chunks, top_k, top_p)
+    return query, Response, url["source"], context, str(contextScore)
+def LLM(content, max_tokens, temp, k_chunks, top_k, top_p, db_type):
+    Response, url, context, contextScore = search_llm(content, max_tokens, temp, k_chunks, top_k,
+                                                    top_p)
+    url = url["source"]
+    return Response, url, context, str(contextScore)
+embs_name = "sentence-transformers/all-mpnet-base-v2"
+hf_embs = HuggingFaceEmbeddings(model_name=embs_name,
+                                    model_kwargs={"device": "cuda"})
+new_db_chroma = load_faiss_db()
+new_db_faiss = load_chroma_db()
+### Load models
+#stt
+wspr = WhisperModel("small", device="cuda" if GPU else "cpu", compute_type="int8")
+#llm
+model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
+model_name = "zephyr-7b-beta.Q4_K_M.gguf"
+llm = load_llm(model_name)
+demo = gr.Blocks()
+with demo:
+    with gr.Tab(model_name):
+        with gr.Row():
+            with gr.Column():
+                with gr.Box():
+                    content = gr.Text(label="Posez votre question")
+                    audio_path = gr.Audio(source="microphone",
+                            format="mp3",
+                            type="filepath",
+                            label="Posez votre question (Whisper-small)")
+                with gr.Row():
+                    max_tokens = gr.Number(label="Max_tokens", value=100, maximum=1000, minimum=1)
+                    temp = gr.Number(label="Temperature", value=0.1, maximum=1.0, minimum=0.0, step=0.1)
+                    k_chunks = gr.Number(label="k_chunks", value=2, maximum=5, minimum=1)
+                    top_k = gr.Number(label="top_k", value=100, maximum=1000, minimum=1)
+                    top_p = gr.Number(label="top_p", value=0.95, maximum=1.0, minimum=0.0)
+                # with gr.Box():
+                #     db_type = gr.Dropdown(choices=["faiss", "chromadb"], label="Vector DB", value="faiss")
+                #     # llm_name = gr.Dropdown(choices=["vicuna-7b-v1.3.ggmlv3.q4_1.bin",
+                #     #                         "vicuna-7b-v1.3.ggmlv3.q5_1.bin"],
+                #     #                         label="llm", value="vicuna-7b-v1.3.ggmlv3.q4_1.bin")
+                #     b3 = gr.Button("update model")
+                #     # b3.click(load_llm, inputs=llm_name, outputs=None)
+            with gr.Column():
+                # transcription = gr.Text(label="transcription")
+                Response = gr.Text(label="Réponse")
+                url = gr.Text(label="url source")
+                context = gr.Text(label="contexte (chunks)")
+                contextScore = gr.Text(label="contexte score (L2 distance)")
+        with gr.Box():
+            b2 = gr.Button("reconnaissace vocale")
+            b1 = gr.Button("search llm")
+        b1.click(LLM, inputs=[content, max_tokens, temp, k_chunks, top_k, top_p], #db_type
+            outputs=[Response, url, context, contextScore])
+        b2.click(stt, inputs=audio_path, outputs=content)
+    # with gr.Tab("gptq"):
+    #     with gr.Row():
+    #         with gr.Column():
+    #             with gr.Box():
+    #                 content = gr.Text(label="Posez votre question")
+    #                 audio_path = gr.Audio(source="microphone",
+    #                         format="mp3",
+    #                         type="filepath",
+    #                         label="Posez votre question (Whisper-small)")
+    #             with gr.Row():
+    #                 max_tokens = gr.Number(label="Max_tokens", value=100, maximum=1000, minimum=1)
+    #                 temp = gr.Number(label="Temperature", value=0.1, maximum=1.0, minimum=0.0)
+    #                 k_chunks = gr.Number(label="k_chunks", value=2, maximum=3, minimum=1)
+    #                 top_k = gr.Number(label="top_k", value=100, maximum=1000, minimum=1)
+    #                 top_p = gr.Number(label="top_p", value=0.95, maximum=1.0, minimum=0.0)
+    #             with gr.Box():
+    #                 db_type = gr.Dropdown(choices=["faiss", "chromadb"], label="Vector DB", value="faiss")
+    #                 llm_name = gr.Dropdown(choices=["llama-2-7b.ggmlv3.q4_1.bin",
+    #                                         "vicuna-7b-v1.3.ggmlv3.q4_1.bin"],
+    #                                         label="llm", value="llama-2-7b.ggmlv3.q4_1.bin")
+    #                 b3 = gr.Button("update model")
+    #                 # b3.click(stt, inputs=llm_name, outputs=None)
+    #         with gr.Column():
+    #             # transcription = gr.Text(label="transcription")
+    #             Response = gr.Text(label="Réponse")
+    #             url = gr.Text(label="url source")
+    #             context = gr.Text(label="contexte (chunks)")
+    #             contextScore = gr.Text(label="contexte score (L2 distance)")
+    #     with gr.Box():
+    #         b2 = gr.Button("reconnaissace vocale")
+    #         b1 = gr.Button("search llm")
+    #     b1.click(LLM, inputs=[content, max_tokens, temp, k_chunks, top_k, top_p, db_type],
+    #         outputs=[Response, url, context, contextScore])
+    #     b2.click(stt, inputs=audio_path, outputs=content)
+if __name__ == "__main__":
+    demo.launch(share=True, enable_queue=True, show_api=True)

build_db.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import requests
+import xml.etree.ElementTree as ET
+from langchain.document_loaders import UnstructuredURLLoader
+from langchain.vectorstores import Chroma, FAISS
+from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+import chromadb
+import logging
+import os
+# works only with sitemap.xml url ONLY
+# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+#Params to edit
+global Chunk_size, Chunk_overlap
+Chunk_size = 2000
+Chunk_overlap = 100
+Sitemap_url = "https://www.malakoffhumanis.com/sitemap.xml"
+def langchain_web_scraper(sitemap_url, chunk_size=1000, chunk_overlap=100):
+    """
+    """
+    # Fetch the sitemap.xml file
+    response = requests.get(sitemap_url)
+    tree = ET.fromstring(response.content)
+    # Extract URLs from sitemap
+    urls = []
+    for url in tree.findall("{http://www.sitemaps.org/schemas/sitemap/0.9}url"):
+        loc = url.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc").text
+        # if "" in loc:
+        urls.append(loc)
+    print("len(urls)", len(urls))
+    # scraping
+    loaders = UnstructuredURLLoader(urls=urls)
+    data = loaders.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
+                                                chunk_overlap=chunk_overlap
+                                                # separators=[" ", "\n"]
+                                                )
+    documents = text_splitter.split_documents(data)
+    return documents
+def store_vdb_faiss(documents=[], hf_embs=None, save_path="faiss_MHCOM"):
+    """
+    """
+    db = FAISS.from_documents(documents, hf_embs)
+    db.save_local(save_path)
+def store_vdb_chroma(documents=[], hf_embs=None, save_path="chroma_MHCOM"):
+    """
+    """
+    ABS_PATH = os.path.dirname(os.path.abspath(__file__))
+    DB_DIR = os.path.join(ABS_PATH, save_path)
+    client_settings = chromadb.config.Settings(
+        chroma_db_impl="duckdb+parquet",
+        persist_directory=DB_DIR,
+        anonymized_telemetry=False
+    )
+    vectorstore = Chroma(
+        collection_name="langchain_store",
+        embedding_function=hf_embs,
+        client_settings=client_settings,
+        persist_directory=DB_DIR,
+    )
+    vectorstore.add_documents(documents=documents, embedding=hf_embs)
+    vectorstore.persist()
+def main():
+    print("scrapping website")
+    documents = langchain_web_scraper(sitemap_url=Sitemap_url,
+                                    chunk_size=Chunk_size,
+                                    chunk_overlap=Chunk_overlap)
+    #store in vector DB FAISS
+    print("load embeddings")
+    embeddings_model_name = "sentence-transformers/all-mpnet-base-v2"
+    hf_embs = HuggingFaceEmbeddings(model_name=embeddings_model_name,
+                                    model_kwargs={"device": "cuda"})
+    print("storing chunks in vector db")
+    store_vdb_faiss(documents=documents,
+                    hf_embs=hf_embs,
+                    save_path="faiss_MH_c{}_o{}".format(str(Chunk_size),
+                                                        str(Chunk_overlap)))
+    # store_vdb_chroma(documents=documents,
+    #                 hf_embs=hf_embs,
+    #                 save_path="chroma_MH_c{}_o{}".format(str(Chunk_size),
+    #                                                     str(Chunk_overlap)))
+if __name__ == '__main__':
+    main()

downld_models_local.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from huggingface_hub import hf_hub_download
+repo_id = "TheBloke/zephyr-7B-beta-GGUF"
+model_name = "zephyr-7b-beta.Q4_K_M.gguf"
+local_dir="./"#"/mnt/ssd1/MH/AMINE/NLPBANK/localchatbot"
+hf_hub_download(repo_id=repo_id,
+                filename=model_name,
+                local_dir=local_dir,
+                local_dir_use_symlinks=False)

faiss_MH_c2000_o100/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8560a800adb7152772c1dd3041e45e0c6842a89b3f8d4c8a5629596de225d519
+size 11461677

faiss_MH_c2000_o100/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93516c28a21f6317fe230a2f92a21e6de8f6fb628a65297cd8fca8c569c95501
+size 6453479

stt.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from flask import Flask, request
+import json
+import base64
+from faster_whisper import WhisperModel
+import tempfile
+import os
+def base64_to_wav(base64_data, save_path):
+    wav_data = base64.b64decode(base64_data)
+    with open(save_path, 'wb') as file:
+        file.write(wav_data)
+app = Flask(__name__)
+GPU = True
+wspr = WhisperModel("small", device="cuda" if GPU else "cpu", compute_type="int8")
+@app.route('/api', methods=['GET' ,'POST'])
+def STT():
+    if request.method == 'POST':
+        result = {}
+        audio_data = request.data
+        # Create a unique filename in the temporary directory
+        temp_dir = tempfile.gettempdir()
+        temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', dir=temp_dir, delete=False)
+        save_path = temp_file.name
+        # save_path = "temp.wav"
+#         save_path = "temp.wav"
+        base64_to_wav(audio_data, save_path)
+        segments, info = wspr.transcribe(save_path, beam_size=5, language="fr")
+        texts = [el.text.strip() for el in segments]
+        transcription = texts[0]
+        query = transcription
+        result["transcription"] = query
+        if os.path.exists(save_path):
+            os.remove(save_path)
+        return result
+    elif request.method == 'GET':
+        # Add your test logic here
+        return "API is working correctly!"
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5566, debug=True)