Spaces:

Tuana
/

pubmed-qa-mixtral-haystack

Sleeping

App Files Files Community

Tuana commited on Jan 9, 2024

Commit

af1cf81

1 Parent(s): 3a8e87d

first commit

Browse files

Files changed (9) hide show

.github/workflows/hf_sync.yml +20 -0
README.md +10 -1
app.py +57 -0
requirements.txt +4 -0
utils/__init__.py +0 -0
utils/config.py +5 -0
utils/haystack.py +89 -0
utils/pubmed_fetcher.py +28 -0
utils/ui.py +53 -0

.github/workflows/hf_sync.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://Tuana:$HF_TOKEN@huggingface.co/spaces/Tuana/pubmed-qa-mixtral-haystack main

README.md CHANGED Viewed

	@@ -1 +1,10 @@
1	- ~~# pubmed-qa-mixtral-haystack~~

+---
+title: Ask PubMed
+emoji: 👩🏻‍⚕️
+colorFrom: pink
+colorTo: yellow
+sdk: streamlit
+sdk_version: 1.25.0
+app_file: app.py
+pinned: true
+---

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from json import JSONDecodeError
+import logging
+from markdown import markdown
+import requests
+import streamlit as st
+from utils.haystack import query, start_haystack
+from utils.ui import reset_results, set_initial_state, sidebar
+set_initial_state()
+sidebar()
+st.write("# 🐤 What have they been posting about lately on Mastodon?")
+if st.session_state.get("H"):
+    pipeline = start_haystack(st.session_state.get("HUGGING_FACE_TOKEN"))
+    st.session_state["api_key_configured"] = True
+    search_bar, button = st.columns(2)
+    # Search bar
+    with search_bar:
+        question = st.text_input("Ask a question", on_change=reset_results)
+    with button:
+        st.write("")
+        st.write("")
+        run_pressed = st.button("Search posts (toots)")
+else:
+    st.write("Please provide your OpenAI Key to start using the application")
+    st.write("If you are using a smaller screen, open the sidebar from the top left to provide your OpenAI Key 🙌")
+if st.session_state.get("api_key_configured"):
+    run_query = (
+        run_pressed or username != st.session_state.username
+    )
+    # Get results for query
+    if run_query and username:
+        reset_results()
+        st.session_state.username = username
+        with st.spinner("🔎"):
+            try:
+                st.session_state.result = query(username, pipeline)
+            except JSONDecodeError as je:
+                st.error(
+                    "👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
+                )
+            except Exception as e:
+                logging.exception(e)
+                st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
+    if st.session_state.result:
+        voice = st.session_state.result
+        st.write(voice['results'][0])

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+haystack-ai==2.0.0b2
+streamlit==1.25.0
+pymed
+markdown

utils/__init__.py ADDED Viewed

File without changes

utils/config.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+HUGGING_FACE_TOKEN = os.getenv('HUGGING_FACE_TOKEN')

utils/haystack.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import streamlit as st
+from haystack import Pipeline
+from pubmed_fetcher import PubMedFetcher
+from haystack.components.generators import HuggingFaceTGIGenerator
+from haystack.components.builders.prompt_builder import PromptBuilder
+# def start_keyword_pipeline(llm):
+#     keyword_prompt_template = """
+# Your task is to convert the follwing question into 3 keywords that can be used to find relevant medical research papers on PubMed.
+# Here is an examples:
+# question: "What are the latest treatments for major depressive disorder?"
+# keywords:
+# Antidepressive Agents
+# Depressive Disorder, Major
+# Treatment-Resistant depression
+# ---
+# question: {{ question }}
+# keywords:
+# """
+#     keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)
+#     keyword_pipeline = Pipeline()
+#     keyword_pipeline.add_component("keyword_prompt_builder", keyword_prompt_builder)
+#     keyword_pipeline.add_component("keyword_llm", llm)
+#     return keyword_pipeline
+# def start_qa_pipeline(llm):
+#     return qa_pipeline
+def start_haystack(huggingface_token):
+    #Use this function to contruct a pipeline
+    llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=huggingface_token)
+    llm.warm_up()
+    # start_keyword_pipeline(llm)
+    # start_qa_pipeline(llm)
+    keyword_prompt_template = """
+Your task is to convert the follwing question into 3 keywords that can be used to find relevant medical research papers on PubMed.
+Here is an examples:
+question: "What are the latest treatments for major depressive disorder?"
+keywords:
+Antidepressive Agents
+Depressive Disorder, Major
+Treatment-Resistant depression
+---
+question: {{ question }}
+keywords:
+"""
+    prompt_template = """
+Answer the question truthfully based on the given documents.
+If the documents don't contain an answer, use your existing knowledge base.
+q: {{ question }}
+Articles:
+{% for article in articles %}
+  {{article.content}}
+  keywords: {{article.meta['keywords']}}
+  title: {{article.meta['title']}}
+{% endfor %}
+"""
+    keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)
+    prompt_builder = PromptBuilder(template=prompt_template)
+    fetcher = PubMedFetcher()
+    pipe = Pipeline()
+    pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
+    pipe.add_component("keyword_llm", llm)
+    pipe.add_component("pubmed_fetcher", fetcher)
+    pipe.add_component("prompt_builder", prompt_builder)
+    pipe.add_component("llm", llm)
+    pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
+    pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")
+    pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
+    pipe.connect("prompt_builder.prompt", "llm.prompt")
+    return pipe
+@st.cache_data(show_spinner=True)
+def query(query, _pipeline):
+    try:
+        result = _pipeline.run(data={"keyword_prompt_builder":{"question":query},
+                          "prompt_builder":{"question": query},
+                          "llm":{"generation_kwargs": {"max_new_tokens": 500}}})
+    except Exception as e:
+        result = ["Please make sure you are providing a correct, public Mastodon account"]
+    return result

utils/pubmed_fetcher.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from pymed import PubMed
+from typing import List
+from haystack import component
+from haystack import Document
+pubmed = PubMed(tool="Haystack2.0Prototype", email="tilde.thurium@deepset.ai")
+def documentize(article):
+  return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})
+@component
+class PubMedFetcher():
+  @component.output_types(articles=List[Document])
+  def run(self, queries: list[str]):
+    cleaned_queries = queries[0].strip().split('\n')
+    articles = []
+    try:
+      for query in cleaned_queries:
+        response = pubmed.query(query, max_results = 1)
+        documents = [documentize(article) for article in response]
+        articles.extend(documents)
+    except Exception as e:
+        print(e)
+        print(f"Couldn't fetch articles for queries: {queries}" )
+    results = {'articles': articles}
+    return results

utils/ui.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import streamlit as st
+from PIL import Image
+def set_state_if_absent(key, value):
+    if key not in st.session_state:
+        st.session_state[key] = value
+def set_initial_state():
+    set_state_if_absent("question", "Ask a question")
+    set_state_if_absent("result", None)
+    set_state_if_absent("haystack_started", False)
+def reset_results(*args):
+    st.session_state.result = None
+def set_hf_api_key(api_key: str):
+    st.session_state["HUGGING_FACE_TOKEN"] = api_key
+def sidebar():
+    with st.sidebar:
+        image = Image.open('logo/haystack-logo-colored.png')
+        st.markdown(
+            "## How to use\n"
+            "1. Enter your Hugging Face TGI API key below\n"
+            "2. Ask a question\n"
+            "3. Enjoy 🤗\n"
+        )
+        api_key_input = st.text_input(
+            "Hugging Face TGI API Key",
+            type="password",
+            placeholder="Paste your Hugging Face TGI token here",
+            value=st.session_state.get("HUGGING_FACE_TOKEN", ""),
+        )
+        if api_key_input:
+            set_hf_api_key(api_key_input)
+        st.markdown("---")
+        st.markdown(
+            "## How this works\n"
+            "This app was built with [Haystack](https://haystack.deepset.ai) using the"
+            " [`PromptNode`](https://docs.haystack.deepset.ai/docs/prompt_node) and custom [`PromptTemplate`](https://docs.haystack.deepset.ai/docs/prompt_node#templates).\n\n"
+            " The source code is also on [GitHub](https://github.com/TuanaCelik/should-i-follow)"
+            " with instructions to run locally.\n"
+            "You can see how the `PromptNode` was set up [here](https://github.com/TuanaCelik/should-i-follow/blob/main/utils/haystack.py)")
+        st.markdown("---")
+        st.markdown("Made by [tuanacelik](https://twitter.com/tuanacelik)")
+        st.markdown("---")
+        st.markdown("""Thanks to [mmz_001](https://twitter.com/mm_sasmitha)
+                        for open sourcing [KnowledgeGPT](https://knowledgegpt.streamlit.app/) which helped me with this sidebar 🙏🏽""")
+        st.image(image, width=250)