Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pyserini.search.lucene import LuceneSearcher | |
import os | |
# Download the index if it doesn't exist | |
if not os.path.exists('msmarco-passage'): | |
os.system('python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'msmarco-passage\')"') | |
def search_pyserini(query): | |
try: | |
searcher = LuceneSearcher('msmarco-passage') | |
searcher.set_bm25(k1=0.9, b=0.4) | |
hits = searcher.search(query, k=10) | |
results = [] | |
for i, hit in enumerate(hits): | |
doc = searcher.doc(hit.docid) | |
content = doc.raw()[:200] + "..." if len(doc.raw()) > 200 else doc.raw() | |
results.append(f"Rank: {i+1}\nDoc ID: {hit.docid}\nScore: {hit.score:.4f}\nContent: {content}\n") | |
return "\n".join(results) | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
css = """ | |
.gradio-container { | |
font-family: 'Arial', sans-serif; | |
} | |
.output-text { | |
white-space: pre-wrap; | |
font-family: 'Courier New', monospace; | |
font-size: 14px; | |
line-height: 1.5; | |
padding: 10px; | |
border: 1px solid #ccc; | |
border-radius: 5px; | |
background-color: #f9f9f9; | |
} | |
""" | |
with gr.Blocks(css=css) as iface: | |
gr.Markdown("# Pyserini Search Interface") | |
gr.Markdown("Enter a query to search using Pyserini with BM25 scoring (k1=0.9, b=0.4).") | |
with gr.Row(): | |
query_input = gr.Textbox( | |
lines=1, | |
placeholder="Enter your search query here...", | |
label="Search Query" | |
) | |
with gr.Row(): | |
search_button = gr.Button("Search", variant="primary") | |
with gr.Row(): | |
output = gr.Textbox( | |
lines=20, | |
label="Search Results", | |
elem_classes=["output-text"] | |
) | |
search_button.click( | |
fn=search_pyserini, | |
inputs=query_input, | |
outputs=output | |
) | |
iface.launch() |