SeaLLM-7B-v2.5-simple

Running on Zero

App Files Files Community

nxphi47 commited on Mar 4

Commit

7d3d5c9

•

1 Parent(s): 5d40c70

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -6

app.py CHANGED Viewed

@@ -172,6 +172,80 @@ EOS_TOKEN = '</s>'
 SYSTEM_PROMPT_1 = """You are a helpful, respectful, honest and safe AI assistant built by Alibaba Group."""
 # ============ CONSTANT ============
 # https://github.com/gradio-app/gradio/issues/884
 MODEL_NAME = "SeaLLM-7B"
@@ -771,7 +845,7 @@ def chat_response_stream_multiturn(
     presence_penalty: float,
     system_prompt: Optional[str] = SYSTEM_PROMPT_1,
     current_time: Optional[float] = None,
-    profile: Optional[gr.OAuthProfile] = None,
 ) -> str:
     """
     gr.Number(value=temperature, label='Temperature (higher -> more random)'),
@@ -794,7 +868,8 @@ def chat_response_stream_multiturn(
     global llm, RES_PRINTED
     assert llm is not None
     assert system_prompt.strip() != '', f'system prompt is empty'
-    is_by_pass = False if profile is None else profile.username in BYPASS_USERS
     tokenizer = llm.get_tokenizer()
     # force removing all
@@ -876,6 +951,32 @@ def chat_response_stream_multiturn(
 def debug_generate_free_form_stream(message):
     output = " This is a debugging message...."
     for i in range(len(output)):
@@ -1450,6 +1551,61 @@ def create_chat_demo(title=None, description=None):
     return demo_chat
 def launch_demo():
     global demo, llm, DEBUG, LOG_FILE
@@ -1544,18 +1700,29 @@ def launch_demo():
     if ENABLE_BATCH_INFER:
-        demo_file_upload = create_file_upload_demo()
         demo_free_form = create_free_form_generation_demo()
         demo_chat = create_chat_demo()
         descriptions = model_desc
         if DISPLAY_MODEL_PATH:
             descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
         demo = CustomTabbedInterface(
-            interface_list=[demo_chat, demo_file_upload, demo_free_form],
-            tab_names=["Chat Interface", "Batch Inference", "Free-form"],
             title=f"{model_title}",
             description=descriptions,
         )
@@ -1582,7 +1749,7 @@ def launch_demo():
         if ENABLE_AGREE_POPUP:
             demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
-        login_btn = gr.LoginButton()
     demo.queue(api_open=False)
     return demo

 SYSTEM_PROMPT_1 = """You are a helpful, respectful, honest and safe AI assistant built by Alibaba Group."""
+# ######### RAG PREPARE
+RAG_CURRENT_FILE, RAG_EMBED, RAG_CURRENT_VECTORSTORE = None, None, None
+RAG_EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+def load_embeddings():
+    global RAG_EMBED
+    if RAG_EMBED is None:
+        from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings
+        print(f'LOading embeddings: {RAG_EMBED_MODEL_NAME}')
+        RAG_EMBED = HuggingFaceEmbeddings(model_name=RAG_EMBED_MODEL_NAME, model_kwargs={'trust_remote_code':True})
+    else:
+        print(f'RAG_EMBED ALREADY EXIST: {RAG_EMBED_MODEL_NAME}: {RAG_EMBED=}')
+    return RAG_EMBED
+def get_rag_embeddings():
+    return load_embeddings()
+_ = get_rag_embeddings()
+RAG_CURRENT_VECTORSTORE = None
+def load_document_split_vectorstore(file_path):
+    global RAG_CURRENT_FILE, RAG_EMBED, RAG_CURRENT_VECTORSTORE
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings
+    from langchain_community.vectorstores import Chroma, FAISS
+    from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
+    # assert RAG_EMBED is not None
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=50)
+    if file_path.endswith('.pdf'):
+        loader = PyPDFLoader(file_path)
+    elif file_path.endswith('.docx'):
+        loader = Docx2txtLoader(file_path)
+    elif file_path.endswith('.txt'):
+        loader = TextLoader(file_path)
+    splits = loader.load_and_split(splitter)
+    RAG_CURRENT_VECTORSTORE = FAISS.from_texts(texts=[s.page_content for s in splits], embedding=get_rag_embeddings())
+    return RAG_CURRENT_VECTORSTORE
+def docs_to_rag_context(docs: List[str]):
+    contexts = "\n".join([d.page_content for d in docs])
+    context = f"""### Begin document
+{contexts}
+### End document
+Asnwer the following query exclusively based on the information provided in the document above. \
+Remember to follow the language of the user query.
+"""
+    return context
+def maybe_get_doc_context(message, file_input, rag_num_docs: Optional[int] = 3):
+    global RAG_CURRENT_FILE, RAG_EMBED, RAG_CURRENT_VECTORSTORE
+    doc_context = None
+    if file_input is not None:
+        assert os.path.exists(file_input), f"not found: {file_input}"
+        if file_input == RAG_CURRENT_FILE:
+            # reuse
+            vectorstore = RAG_CURRENT_VECTORSTORE
+            print(f'Reuse vectorstore: {file_input}')
+        else:
+            vectorstore = load_document_split_vectorstore(file_input)
+            print(f'New vectorstore: {RAG_CURRENT_FILE} {file_input}')
+            RAG_CURRENT_FILE = file_input
+        docs = vectorstore.similarity_search(message, k=rag_num_docs)
+        doc_context = docs_to_rag_context(docs)
+    return doc_context
+# ######### RAG PREPARE
 # ============ CONSTANT ============
 # https://github.com/gradio-app/gradio/issues/884
 MODEL_NAME = "SeaLLM-7B"
     presence_penalty: float,
     system_prompt: Optional[str] = SYSTEM_PROMPT_1,
     current_time: Optional[float] = None,
+    # profile: Optional[gr.OAuthProfile] = None,
 ) -> str:
     """
     gr.Number(value=temperature, label='Temperature (higher -> more random)'),
     global llm, RES_PRINTED
     assert llm is not None
     assert system_prompt.strip() != '', f'system prompt is empty'
+    # is_by_pass = False if profile is None else profile.username in BYPASS_USERS
+    is_by_pass = False
     tokenizer = llm.get_tokenizer()
     # force removing all
+def chat_response_stream_rag_multiturn(
+    message: str,
+    history: List[Tuple[str, str]],
+    file_input: str,
+    temperature: float,
+    max_tokens: int,
+    # frequency_penalty: float,
+    # presence_penalty: float,
+    system_prompt: Optional[str] = SYSTEM_PROMPT_1,
+    current_time: Optional[float] = None,
+    rag_num_docs: Optional[int] = 3,
+):
+    message = message.strip()
+    frequency_penalty = FREQUENCE_PENALTY
+    presence_penalty = PRESENCE_PENALTY
+    if len(message) == 0:
+        raise gr.Error("The message cannot be empty!")
+    doc_context = maybe_get_doc_context(message, file_input, rag_num_docs=rag_num_docs)
+    if doc_context is not None:
+        message = f"{doc_context}\n\n{message}"
+    yield from chat_response_stream_multiturn(
+        message, history, temperature, max_tokens, frequency_penalty,
+        presence_penalty, system_prompt, current_time
+    )
 def debug_generate_free_form_stream(message):
     output = " This is a debugging message...."
     for i in range(len(output)):
     return demo_chat
+def upload_file(file):
+    # file_paths = [file.name for file in files]
+    # return file_paths
+    return file.name
+def create_chat_demo_rag(title=None, description=None):
+    sys_prompt = SYSTEM_PROMPT_1
+    max_tokens = MAX_TOKENS
+    temperature = TEMPERATURE
+    frequence_penalty = FREQUENCE_PENALTY
+    presence_penalty = PRESENCE_PENALTY
+    # with gr.Blocks(title="RAG") as rag_demo:
+    additional_inputs = [
+        # gr.File(label='Upload Document', file_count='single', file_types=['pdf', 'docx', 'txt', 'json']),
+        gr.Textbox(value=None, label='Document path', lines=1, interactive=False),
+        gr.Number(value=temperature, label='Temperature (higher -> more random)'),
+        gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
+        # gr.Number(value=frequence_penalty, label='Frequency penalty (> 0 encourage new tokens over repeated tokens)'),
+        # gr.Number(value=presence_penalty, label='Presence penalty (> 0 encourage new tokens, < 0 encourage existing tokens)'),
+        gr.Textbox(value=sys_prompt, label='System prompt', lines=1, interactive=False),
+        gr.Number(value=0, label='current_time', visible=False),
+    ]
+    demo_rag_chat = gr.ChatInterface(
+        chat_response_stream_rag_multiturn,
+        chatbot=gr.Chatbot(
+            label=MODEL_NAME + "-RAG",
+            bubble_full_width=False,
+            latex_delimiters=[
+                { "left": "$", "right": "$", "display": False},
+                { "left": "$$", "right": "$$", "display": True},
+            ],
+            show_copy_button=True,
+        ),
+        textbox=gr.Textbox(placeholder='Type message', lines=1, max_lines=128, min_width=200),
+        submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
+        # ! consider preventing the stop button
+        # stop_btn=None,
+        title=title,
+        description=description,
+        additional_inputs=additional_inputs,
+        additional_inputs_accordion=gr.Accordion("Additional Inputs", open=True),
+        # examples=CHAT_EXAMPLES,
+        cache_examples=False
+    )
+    with demo_rag_chat:
+        upload_button = gr.UploadButton("Click to Upload document", file_types=['pdf', 'docx', 'txt', 'json'], file_count="single")
+        upload_button.upload(upload_file, upload_button, additional_inputs[0])
+    # return demo_chat
+    return demo_rag_chat
 def launch_demo():
     global demo, llm, DEBUG, LOG_FILE
     if ENABLE_BATCH_INFER:
+        # demo_file_upload = create_file_upload_demo()
         demo_free_form = create_free_form_generation_demo()
         demo_chat = create_chat_demo()
+        demo_chat_rag = create_chat_demo_rag()
         descriptions = model_desc
         if DISPLAY_MODEL_PATH:
             descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
         demo = CustomTabbedInterface(
+            interface_list=[
+                demo_chat,
+                demo_chat_rag,
+                demo_free_form
+                # demo_file_upload,
+            ],
+            tab_names=[
+                "Chat Interface",
+                "RAG Chat Interface"
+                "Text completion"
+                # "Batch Inference",
+            ],
             title=f"{model_title}",
             description=descriptions,
         )
         if ENABLE_AGREE_POPUP:
             demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
+        # login_btn = gr.LoginButton()
     demo.queue(api_open=False)
     return demo