Spaces:
Runtime error
Runtime error
File size: 2,112 Bytes
f10968e 5f3cfd3 f10968e 5f3cfd3 f10968e 5f3cfd3 f10968e 5f3cfd3 f10968e 5f3cfd3 f10968e 5f3cfd3 f10968e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from gensim.models import Word2Vec
from collections import defaultdict
def load_word2vec_model(model_path):
'''
Load a word2vec model from a file
'''
return Word2Vec.load(model_path)
def get_word_vector(model, word):
'''
Return the word vector of a word
'''
return model.wv[word]
def iterate_over_words(model):
'''
Iterate over all words in the vocabulary and print their vectors
'''
index = 0
for word, index in model.wv.key_to_index.items():
vector = get_word_vector(model, word)
print(f'{index} Word: {word}, Vector: {vector}')
index += 1
def model_dictionary(model):
'''
Return the dictionary of the word2vec model
Key is the word and value is the vector of the word
'''
dict = defaultdict(list)
for word, index in model.wv.key_to_index.items():
vector = get_word_vector(model, word)
dict[word] = vector
return dict
def dot_product(vector_a, vector_b):
'''
Return the dot product of two vectors
'''
return sum(a * b for a, b in zip(vector_a, vector_b))
def magnitude(vector):
'''
Return the magnitude of a vector
'''
return sum(x**2 for x in vector) ** 0.5
def cosine_similarity(vector_a, vector_b):
'''
Return the cosine similarity of two vectors
'''
dot_prod = dot_product(vector_a, vector_b)
mag_a = magnitude(vector_a)
mag_b = magnitude(vector_b)
# Avoid division by zero
if mag_a == 0 or mag_b == 0:
return 0.0
similarity = dot_prod / (mag_a * mag_b)
return similarity
def main():
model = load_word2vec_model('../models/archaic_cbow.model')
archaic_cbow_dict = model_dictionary(model)
score = cosine_similarity(archaic_cbow_dict['Πελοπόννησος'], archaic_cbow_dict['σπάργανον'])
print(score)
# vector = get_word_vector(model, 'ἀνήρ')
# print(vector)
# Iterate over all words and print their vectors
# iterate_over_words(model)
if __name__ == "__main__":
main()
|