|
--- |
|
tags: |
|
- word2vec |
|
language: oc |
|
license: gpl-3.0 |
|
--- |
|
|
|
## Description |
|
Word embedding model trained by Al-Rfou et al. |
|
|
|
|
|
## How to use? |
|
|
|
``` |
|
import pickle |
|
from numpy import dot |
|
from numpy.linalg import norm |
|
from huggingface_hub import hf_hub_download |
|
words, embeddings = pickle.load(open(hf_hub_download(repo_id="Word2vec/polyglot_words_embeddings_en", filename="words_embeddings_en.pkl"), 'rb'),encoding="latin1") |
|
|
|
word = "Irish" |
|
a = embeddings[words.index(word)] |
|
most_similar = [] |
|
for i in range(len(embeddings)): |
|
if i != words.index(word): |
|
b = embeddings[i] |
|
cos_sim = dot(a, b)/(norm(a)*norm(b)) |
|
most_similar.append(cos_sim) |
|
else: |
|
most_similar.append(0) |
|
|
|
words[most_similar.index(max(most_similar))] |
|
``` |
|
|
|
## Citation |
|
|
|
``` |
|
@InProceedings{polyglot:2013:ACL-CoNLL, |
|
author = {Al-Rfou, Rami and Perozzi, Bryan and Skiena, Steven}, |
|
title = {Polyglot: Distributed Word Representations for Multilingual NLP}, |
|
booktitle = {Proceedings of the Seventeenth Conference on Computational Natural Language Learning}, |
|
month = {August}, |
|
year = {2013}, |
|
address = {Sofia, Bulgaria}, |
|
publisher = {Association for Computational Linguistics}, |
|
pages = {183--192}, |
|
url = {http://www.aclweb.org/anthology/W13-3520} |
|
} |
|
``` |
|
|