|
from gensim.models import KeyedVectors |
|
from typing import List, Dict |
|
|
|
|
|
class PreTrainedPipeline: |
|
def __init__(self, path=""): |
|
from huggingface_hub import hf_hub_download |
|
|
|
self.model = KeyedVectors.load_word2vec_format( |
|
hf_hub_download(repo_id="lang-uk/word2vec-uk", filename="ubercorpus.cased.tokenized.300d"), binary=False |
|
) |
|
|
|
def __call__(self, inputs: str) -> List[Dict]: |
|
""" |
|
Args: |
|
inputs (:obj:`str`): |
|
a string containing some text |
|
Return: |
|
A :obj:`str` |
|
""" |
|
inputs = inputs.strip() |
|
return [{"generated_text": ", \n\n".join(f"{k}" for k, v in self.model.most_similar(inputs, topn=30))}] |
|
|