File size: 820 Bytes
961c631
 
 
 
 
50b2767
 
961c631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from pyserini.search.lucene import LuceneSearcher
from transformers import Tool
import json


searcher = LuceneSearcher.from_prebuilt_index('wikipedia-kilt-doc')
# searcher = LuceneSearcher('index-wikipedia-kilt-doc-20210421-f29307.b8ec8feb654f7aaa86f9901dc6c804a8')



def search(query):
    hits = searcher.search(query, k=1)

    # for i in range(0, 5):
    #     print(f'{i+1:2} {hits[i].docid:15} {hits[i].score:.5f}')

    hit = hits[0]
    contents = json.loads(hit.raw)['contents']
    return contents

class PyseriniWikipediaKiltDoc(Tool):
    name = "pyserini-wikipedia-kilt-doc"
    description = (
        "This is a tool that returns the top 5 results from the Wikipedia KILT index."
    )

    inputs = ["text"]
    outputs = ["text"]

    def __call__(self, query: str):
        return search(query)