|
"""# TASK3: a search-engine demo based on Huggingface space (4 points) |
|
|
|
## TASK3.1: create the gradio app (2 point) |
|
|
|
Create a gradio app to demo the BM25 search engine index on SciQ. The app should have a single input variable for the query (of type `str`) and a single output variable for the returned ranking (of type `List[Hit]` in the code below). |
|
""" |
|
from typing import TypedDict, Optional, List |
|
import gradio as gr |
|
import copy_of_hw1 |
|
from copy_of_hw1 import BM25Retriever |
|
|
|
class Hit(TypedDict): |
|
cid: str |
|
score: float |
|
text: str |
|
|
|
demo: Optional[gr.Interface] = None |
|
return_type = List[Hit] |
|
|
|
|
|
def hits(query): |
|
Hits = [] |
|
|
|
bm25_retriever = BM25Retriever(index_dir="output/bm25_index") |
|
retrieved = bm25_retriever.retrieve(query) |
|
for cid in retrieved.keys(): |
|
docid = bm25_retriever.index.cid2docid[cid] |
|
doc_text = bm25_retriever.index.doc_texts[docid] |
|
Hits.append(Hit(cid=cid, score=retrieved[cid], text=doc_text)) |
|
return Hits |
|
|
|
demo = gr.Interface( |
|
fn=hits, |
|
inputs=["text"], |
|
outputs=["text"], |
|
) |
|
|
|
demo.launch() |