Update copy_of_hw1.py
Browse files- copy_of_hw1.py +10 -1
copy_of_hw1.py
CHANGED
@@ -230,7 +230,6 @@ class BM25Index(InvertedIndex):
|
|
230 |
)
|
231 |
return index
|
232 |
|
233 |
-
|
234 |
from nlp4web_codebase.ir.models import BaseRetriever
|
235 |
from typing import Type
|
236 |
from abc import abstractmethod
|
@@ -293,3 +292,13 @@ class BM25Retriever(BaseInvertedIndexRetriever):
|
|
293 |
@property
|
294 |
def index_class(self) -> Type[BM25Index]:
|
295 |
return BM25Index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
)
|
231 |
return index
|
232 |
|
|
|
233 |
from nlp4web_codebase.ir.models import BaseRetriever
|
234 |
from typing import Type
|
235 |
from abc import abstractmethod
|
|
|
292 |
@property
|
293 |
def index_class(self) -> Type[BM25Index]:
|
294 |
return BM25Index
|
295 |
+
|
296 |
+
best_b, best_k1 = 0.8, 0.5
|
297 |
+
bm25_index = BM25Index.build_from_documents(
|
298 |
+
documents=iter(sciq.corpus),
|
299 |
+
ndocs=12160,
|
300 |
+
show_progress_bar=True,
|
301 |
+
k1=best_k1,
|
302 |
+
b=best_b
|
303 |
+
)
|
304 |
+
bm25_index.save("output/bm25_index")
|