zhichao-geng commited on
Commit
6d98da9
1 Parent(s): bc4bee5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -3
README.md CHANGED
@@ -40,7 +40,6 @@ import itertools
40
  import torch
41
 
42
  from transformers import AutoModelForMaskedLM, AutoTokenizer
43
- from transformers.utils import cached_path,hf_bucket_url
44
 
45
 
46
  # get sparse vector from dense vectors with shape batch_size * seq_len * vocab_size
@@ -67,8 +66,8 @@ def transform_sparse_vector_to_dict(sparse_vector):
67
 
68
  # download the idf file from model hub. idf is used to give weights for query tokens
69
  def get_tokenizer_idf(tokenizer):
70
- url = hf_bucket_url("opensearch-project/opensearch-neural-sparse-encoding-doc-v1","idf.json")
71
- local_cached_path = cached_path(url)
72
  with open(local_cached_path) as f:
73
  idf = json.load(f)
74
  idf_vector = [0]*tokenizer.vocab_size
 
40
  import torch
41
 
42
  from transformers import AutoModelForMaskedLM, AutoTokenizer
 
43
 
44
 
45
  # get sparse vector from dense vectors with shape batch_size * seq_len * vocab_size
 
66
 
67
  # download the idf file from model hub. idf is used to give weights for query tokens
68
  def get_tokenizer_idf(tokenizer):
69
+ from huggingface_hub import hf_hub_download
70
+ local_cached_path = hf_hub_download(repo_id="opensearch-project/opensearch-neural-sparse-encoding-doc-v1", filename="idf.json")
71
  with open(local_cached_path) as f:
72
  idf = json.load(f)
73
  idf_vector = [0]*tokenizer.vocab_size