Spaces:

nouamanetazi
/

hf-search

Running

App Files Files Community

nouamanetazi HF staff commited on Feb 11, 2022

Commit

0e05863

•

1 Parent(s): fabd282

remove flask server

Browse files

Files changed (4) hide show

pages/__init__.py +0 -2
pages/document.py +0 -21
pages/search_engine.py +0 -157
server/api.py → st_utils.py +53 -33

pages/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- from .search_engine import page as search_engine_page
2	- from .document import page as document_page

pages/document.py DELETED Viewed

@@ -1,21 +0,0 @@
-import os
-import re
-import json
-import datetime
-import itertools
-import requests
-from PIL import Image
-import base64
-import streamlit as st
-def page():
-    record = st.session_state.get("selected_record")
-    st.set_page_config(
-        page_title=f"Record {record['filename']}",
-        page_icon="👨‍⚕️",
-        layout="wide",
-        initial_sidebar_state="collapsed",
-    )
-    st.button("Back", on_click=lambda: set_record(None))
-    st.write(record)

pages/search_engine.py DELETED Viewed

@@ -1,157 +0,0 @@
-import os
-import re
-import json
-import datetime
-import itertools
-import requests
-from PIL import Image
-import base64
-import streamlit as st
-from huggingface_hub import ModelSearchArguments
-import webbrowser
-from numerize.numerize import numerize
-def paginator(label, articles, articles_per_page=10, on_sidebar=True):
-    # https://gist.github.com/treuille/2ce0acb6697f205e44e3e0f576e810b7
-    """Lets the user paginate a set of article.
-    Parameters
-    ----------
-    label : str
-        The label to display over the pagination widget.
-    article : Iterator[Any]
-        The articles to display in the paginator.
-    articles_per_page: int
-        The number of articles to display per page.
-    on_sidebar: bool
-        Whether to display the paginator widget on the sidebar.
-    Returns
-    -------
-    Iterator[Tuple[int, Any]]
-        An iterator over *only the article on that page*, including
-        the item's index.
-    """
-    # Figure out where to display the paginator
-    if on_sidebar:
-        location = st.sidebar.empty()
-    else:
-        location = st.empty()
-    # Display a pagination selectbox in the specified location.
-    articles = list(articles)
-    n_pages = (len(articles) - 1) // articles_per_page + 1
-    page_format_func = lambda i: f"Results {i*10} to {i*10 +10 -1}"
-    page_number = location.selectbox(label, range(n_pages), format_func=page_format_func)
-    # Iterate over the articles in the page to let the user display them.
-    min_index = page_number * articles_per_page
-    max_index = min_index + articles_per_page
-    return itertools.islice(enumerate(articles), min_index, max_index)
-def page():
-    ### SIDEBAR
-    search_backend = st.sidebar.selectbox(
-        "Search method",
-        ["semantic", "bm25", "hfapi"],
-        format_func=lambda x: {"hfapi": "Keyword search", "bm25": "BM25 search", "semantic": "Semantic Search"}[x],
-    )
-    limit_results = st.sidebar.number_input("Limit results", min_value=0, value=10)
-    st.sidebar.markdown("# Filters")
-    args = ModelSearchArguments()
-    library = st.sidebar.multiselect(
-        "Library", args.library.values(), format_func=lambda x: {v: k for k, v in args.library.items()}[x]
-    )
-    task = st.sidebar.multiselect(
-        "Task", args.pipeline_tag.values(), format_func=lambda x: {v: k for k, v in args.pipeline_tag.items()}[x]
-    )
-    ### MAIN PAGE
-    st.markdown(
-        "<h1 style='text-align: center; '>🔎🤗 HF Search Engine</h1>",
-        unsafe_allow_html=True,
-    )
-    # Search bar
-    search_query = st.text_input(
-        "Search for a model in HuggingFace", value="", max_chars=None, key=None, type="default"
-    )
-    # Search API
-    endpoint = "http://localhost:5000"
-    headers = {
-        "Content-Type": "application/json",
-        "api-key": "password",
-    }
-    search_url = f"{endpoint}/{search_backend}/search"
-    filters = {
-        "library": library,
-        "task": task,
-    }
-    search_body = {
-        "query": search_query,
-        "filters": json.dumps(filters, default=str),
-        "limit": limit_results,
-    }
-    if search_query != "":
-        response = requests.post(search_url, headers=headers, json=search_body).json()
-        hit_list = []
-        _ = [
-            hit_list.append(
-                {
-                    "modelId": hit["modelId"],
-                    "tags": hit["tags"],
-                    "downloads": hit["downloads"],
-                    "likes": hit["likes"],
-                    "readme": hit.get("readme", None),
-                }
-            )
-            for hit in response.get("value")
-        ]
-        if hit_list:
-            st.write(f'Search results ({response.get("count")}):')
-            if response.get("count") > 100:
-                shown_results = 100
-            else:
-                shown_results = response.get("count")
-            for i, hit in paginator(
-                f"Select results (showing {shown_results} of {response.get('count')} results)",
-                hit_list,
-            ):
-                col1, col2, col3 = st.columns([5,1,1])
-                col1.metric("Model", hit["modelId"])
-                col2.metric("N° downloads", numerize(hit["downloads"]))
-                col3.metric("N° likes", numerize(hit["likes"]))
-                st.button(f"View model on 🤗", on_click=lambda hit=hit: webbrowser.open(f"https://huggingface.co/{hit['modelId']}"), key=hit["modelId"])
-                st.write(f"**Tags:** {'&nbsp;&nbsp;•&nbsp;&nbsp;'.join(hit['tags'])}")
-                if hit["readme"]:
-                    with st.expander("See README"):
-                        st.write(hit["readme"])
-                # TODO: embed huggingface spaces
-                #                 import streamlit.components.v1 as components
-                #                 components.html(
-                #     f"""
-                #     <link rel="stylesheet" href="https://gradio.s3-us-west-2.amazonaws.com/2.6.2/static/bundle.css">
-                # <div id="target"></div>
-                # <script src="https://gradio.s3-us-west-2.amazonaws.com/2.6.2/static/bundle.js"></script>
-                # <script>
-                # launchGradioFromSpaces("abidlabs/question-answering", "#target")
-                # </script>
-                #     """,
-                #     height=400,
-                # )
-                st.markdown("---")
-        else:
-            st.write(f"No Search results, please try again with different keywords")

server/api.py → st_utils.py RENAMED Viewed

@@ -1,23 +1,12 @@
-from flask import Flask, request
 import json
 from huggingface_hub import HfApi, ModelFilter, DatasetFilter, ModelSearchArguments
 from pprint import pprint
 from hf_search import hf_search
-app = Flask(__name__)
-@app.route("/hello")
-def hello():
-    return "<h1 style='color:blue'>Hello There!</h1>"
-@app.route("/hfapi/search", methods=["POST"])
-def hf_api():
-    request_data = request.get_json()
-    query = request_data.get("query")
-    filters = json.loads(request_data.get("filters"))
-    limit = request_data.get("limit", 5)
     print("query", query)
     print("filters", filters)
     print("limit", limit)
@@ -43,15 +32,11 @@ def hf_api():
     if len(hits) > limit:
         hits = hits[:limit]
     pprint(hits)
-    return json.dumps({"value": hits, "count": count})
-@app.route("/semantic/search", methods=["POST"])
-def semantic_search():
-    request_data = request.get_json()
-    query = request_data.get("query")
-    filters = json.loads(request_data.get("filters"))
-    limit = request_data.get("limit", 5)
     print("query", query)
     print("filters", filters)
     print("limit", limit)
@@ -67,14 +52,11 @@ def semantic_search():
         }
         for hit in hits
     ]
-    return json.dumps({"value": hits, "count": len(hits)})
-@app.route("/bm25/search", methods=["POST"])
-def bm25_search():
-    request_data = request.get_json()
-    query = request_data.get("query")
-    filters = json.loads(request_data.get("filters"))
-    limit = request_data.get("limit", 5)
     print("query", query)
     print("filters", filters)
     print("limit", limit)
@@ -91,9 +73,47 @@ def bm25_search():
         }
         for hit in hits
     ]
-    hits = [hits[i] for i in range(len(hits)) if hits[i]["modelId"] not in [h["modelId"] for h in hits[:i]]] # unique hits
-    return json.dumps({"value": hits, "count": len(hits)})
-if __name__ == "__main__":
-    app.run(host="localhost", port=5000)

 import json
 from huggingface_hub import HfApi, ModelFilter, DatasetFilter, ModelSearchArguments
 from pprint import pprint
 from hf_search import hf_search
+import streamlit as st
+import itertools
+@st.cache
+def hf_api(query, limit=5, filters={}):
     print("query", query)
     print("filters", filters)
     print("limit", limit)
     if len(hits) > limit:
         hits = hits[:limit]
     pprint(hits)
+    return {"hits": hits, "count": count}
+@st.cache
+def semantic_search(query, limit=5, filters={}):
     print("query", query)
     print("filters", filters)
     print("limit", limit)
         }
         for hit in hits
     ]
+    return {"hits": hits, "count": len(hits)}
+@st.cache
+def bm25_search(query, limit=5, filters={}):
     print("query", query)
     print("filters", filters)
     print("limit", limit)
         }
         for hit in hits
     ]
+    hits = [
+        hits[i] for i in range(len(hits)) if hits[i]["modelId"] not in [h["modelId"] for h in hits[:i]]
+    ]  # unique hits
+    return {"hits": hits, "count": len(hits)}
+def paginator(label, articles, articles_per_page=10, on_sidebar=True):
+    # https://gist.github.com/treuille/2ce0acb6697f205e44e3e0f576e810b7
+    """Lets the user paginate a set of article.
+    Parameters
+    ----------
+    label : str
+        The label to display over the pagination widget.
+    article : Iterator[Any]
+        The articles to display in the paginator.
+    articles_per_page: int
+        The number of articles to display per page.
+    on_sidebar: bool
+        Whether to display the paginator widget on the sidebar.
+    Returns
+    -------
+    Iterator[Tuple[int, Any]]
+        An iterator over *only the article on that page*, including
+        the item's index.
+    """
+    # Figure out where to display the paginator
+    if on_sidebar:
+        location = st.sidebar.empty()
+    else:
+        location = st.empty()
+    # Display a pagination selectbox in the specified location.
+    articles = list(articles)
+    n_pages = (len(articles) - 1) // articles_per_page + 1
+    page_format_func = lambda i: f"Results {i*10} to {i*10 +10 -1}"
+    page_number = location.selectbox(label, range(n_pages), format_func=page_format_func)
+    # Iterate over the articles in the page to let the user display them.
+    min_index = page_number * articles_per_page
+    max_index = min_index + articles_per_page
+    return itertools.islice(enumerate(articles), min_index, max_index)