File size: 720 Bytes
53a9a92 f49bb16 53a9a92 3c45d75 f49bb16 b30bec4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
from gensim.models import KeyedVectors
from typing import List, Dict
class PreTrainedPipeline:
def __init__(self, path=""):
from huggingface_hub import hf_hub_download
self.model = KeyedVectors.load_word2vec_format(
hf_hub_download(repo_id="lang-uk/word2vec-uk", filename="ubercorpus.cased.tokenized.300d"), binary=False
)
def __call__(self, inputs: str) -> List[Dict]:
"""
Args:
inputs (:obj:`str`):
a string containing some text
Return:
A :obj:`str`
"""
inputs = inputs.strip()
return [{"generated_text": ", \n\n".join(f"{k}" for k, v in self.model.most_similar(inputs, topn=30))}]
|