Spaces:
Runtime error
Runtime error
Try using a KenLM model trie binary version, if not found try using the hash table binary version
Browse files
perplexity_lenses/perplexity.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import re
|
3 |
import unicodedata
|
4 |
from typing import Dict
|
|
|
5 |
|
6 |
import kenlm
|
7 |
import sentencepiece
|
@@ -178,10 +179,16 @@ class KenlmModel:
|
|
178 |
return self.non_printing_chars_re.sub("", text)
|
179 |
|
180 |
def download_kenlm_model(self, model_dataset: str, language: str):
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
sentence_piece_model_url = hf_hub_url(
|
186 |
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.sp.model"
|
187 |
)
|
|
|
2 |
import re
|
3 |
import unicodedata
|
4 |
from typing import Dict
|
5 |
+
from requests.exceptions import HTTPError
|
6 |
|
7 |
import kenlm
|
8 |
import sentencepiece
|
|
|
179 |
return self.non_printing_chars_re.sub("", text)
|
180 |
|
181 |
def download_kenlm_model(self, model_dataset: str, language: str):
|
182 |
+
try:
|
183 |
+
kenlm_model_url = hf_hub_url(
|
184 |
+
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.trie.bin"
|
185 |
+
)
|
186 |
+
self.kenlm_model_dir = cached_download(kenlm_model_url)
|
187 |
+
except HTTPError:
|
188 |
+
kenlm_model_url = hf_hub_url(
|
189 |
+
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.bin"
|
190 |
+
)
|
191 |
+
self.kenlm_model_dir = cached_download(kenlm_model_url)
|
192 |
sentence_piece_model_url = hf_hub_url(
|
193 |
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.sp.model"
|
194 |
)
|